1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
834 static tree ix86_build_builtin_va_list (void);
838 rtx base, index, disp;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address (rtx, struct ix86_address *);
844 static int ix86_address_cost (rtx);
845 static bool ix86_cannot_force_const_mem (rtx);
846 static rtx ix86_delegitimize_address (rtx);
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi (const struct builtin_description *,
851 static rtx ix86_expand_sse_compare (const struct builtin_description *,
853 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
854 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
855 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_store_builtin (enum insn_code, tree);
857 static rtx safe_vector_operand (rtx, enum machine_mode);
858 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
859 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
860 enum rtx_code *, enum rtx_code *);
861 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
862 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
863 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
864 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
865 static int ix86_fp_comparison_cost (enum rtx_code code);
866 static unsigned int ix86_select_alt_pic_regnum (void);
867 static int ix86_save_reg (unsigned int, int);
868 static void ix86_compute_frame_layout (struct ix86_frame *);
869 static int ix86_comp_type_attributes (tree, tree);
870 static int ix86_function_regparm (tree, tree);
871 const struct attribute_spec ix86_attribute_table[];
872 static bool ix86_function_ok_for_sibcall (tree, tree);
873 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
874 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
875 static int ix86_value_regno (enum machine_mode);
876 static bool contains_128bit_aligned_vector_p (tree);
877 static bool ix86_ms_bitfield_layout_p (tree);
878 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
879 static int extended_reg_mentioned_1 (rtx *, void *);
880 static bool ix86_rtx_costs (rtx, int, int, int *);
881 static int min_insn_size (rtx);
882 static void k8_avoid_jump_misspredicts (void);
884 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
885 static void ix86_svr3_asm_out_constructor (rtx, int);
888 /* Register class used for passing given 64bit part of the argument.
889 These represent classes as documented by the PS ABI, with the exception
890 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
891 use SF or DFmode move instead of DImode to avoid reformatting penalties.
893 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
894 whenever possible (upper half does contain padding).
896 enum x86_64_reg_class
899 X86_64_INTEGER_CLASS,
900 X86_64_INTEGERSI_CLASS,
909 static const char * const x86_64_reg_class_name[] =
910 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
912 #define MAX_CLASSES 4
913 static int classify_argument (enum machine_mode, tree,
914 enum x86_64_reg_class [MAX_CLASSES], int);
915 static int examine_argument (enum machine_mode, tree, int, int *, int *);
916 static rtx construct_container (enum machine_mode, tree, int, int, int,
918 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
919 enum x86_64_reg_class);
921 /* Table of constants used by fldpi, fldln2, etc... */
922 static REAL_VALUE_TYPE ext_80387_constants_table [5];
923 static bool ext_80387_constants_init = 0;
924 static void init_ext_80387_constants (void);
926 /* Initialize the GCC target structure. */
927 #undef TARGET_ATTRIBUTE_TABLE
928 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
929 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
930 # undef TARGET_MERGE_DECL_ATTRIBUTES
931 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #undef TARGET_COMP_TYPE_ATTRIBUTES
935 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
937 #undef TARGET_INIT_BUILTINS
938 #define TARGET_INIT_BUILTINS ix86_init_builtins
940 #undef TARGET_EXPAND_BUILTIN
941 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
943 #undef TARGET_ASM_FUNCTION_EPILOGUE
944 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
946 #undef TARGET_ASM_OPEN_PAREN
947 #define TARGET_ASM_OPEN_PAREN ""
948 #undef TARGET_ASM_CLOSE_PAREN
949 #define TARGET_ASM_CLOSE_PAREN ""
951 #undef TARGET_ASM_ALIGNED_HI_OP
952 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
953 #undef TARGET_ASM_ALIGNED_SI_OP
954 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
956 #undef TARGET_ASM_ALIGNED_DI_OP
957 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #undef TARGET_ASM_UNALIGNED_HI_OP
961 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
962 #undef TARGET_ASM_UNALIGNED_SI_OP
963 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
964 #undef TARGET_ASM_UNALIGNED_DI_OP
965 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
967 #undef TARGET_SCHED_ADJUST_COST
968 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
969 #undef TARGET_SCHED_ISSUE_RATE
970 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
971 #undef TARGET_SCHED_VARIABLE_ISSUE
972 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
973 #undef TARGET_SCHED_INIT
974 #define TARGET_SCHED_INIT ix86_sched_init
975 #undef TARGET_SCHED_REORDER
976 #define TARGET_SCHED_REORDER ix86_sched_reorder
977 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
978 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
979 ia32_use_dfa_pipeline_interface
980 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
981 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
982 ia32_multipass_dfa_lookahead
984 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
985 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
988 #undef TARGET_HAVE_TLS
989 #define TARGET_HAVE_TLS true
991 #undef TARGET_CANNOT_FORCE_CONST_MEM
992 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
994 #undef TARGET_DELEGITIMIZE_ADDRESS
995 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
997 #undef TARGET_MS_BITFIELD_LAYOUT_P
998 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1000 #undef TARGET_ASM_OUTPUT_MI_THUNK
1001 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1002 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1005 #undef TARGET_ASM_FILE_START
1006 #define TARGET_ASM_FILE_START x86_file_start
1008 #undef TARGET_RTX_COSTS
1009 #define TARGET_RTX_COSTS ix86_rtx_costs
1010 #undef TARGET_ADDRESS_COST
1011 #define TARGET_ADDRESS_COST ix86_address_cost
1013 #undef TARGET_MACHINE_DEPENDENT_REORG
1014 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1016 #undef TARGET_BUILD_BUILTIN_VA_LIST
1017 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1019 struct gcc_target targetm = TARGET_INITIALIZER;
1021 /* The svr4 ABI for the i386 says that records and unions are returned
1023 #ifndef DEFAULT_PCC_STRUCT_RETURN
1024 #define DEFAULT_PCC_STRUCT_RETURN 1
1027 /* Sometimes certain combinations of command options do not make
1028 sense on a particular target machine. You can define a macro
1029 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1030 defined, is executed once just after all the command options have
1033 Don't use this macro to turn on various extra optimizations for
1034 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1037 override_options (void)
1040 /* Comes from final.c -- no real reason to change it. */
1041 #define MAX_CODE_ALIGN 16
1045 const struct processor_costs *cost; /* Processor costs */
1046 const int target_enable; /* Target flags to enable. */
1047 const int target_disable; /* Target flags to disable. */
1048 const int align_loop; /* Default alignments. */
1049 const int align_loop_max_skip;
1050 const int align_jump;
1051 const int align_jump_max_skip;
1052 const int align_func;
1054 const processor_target_table[PROCESSOR_max] =
1056 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1057 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1058 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1059 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1060 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1061 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1062 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1063 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1066 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1069 const char *const name; /* processor name or nickname. */
1070 const enum processor_type processor;
1071 const enum pta_flags
1076 PTA_PREFETCH_SSE = 8,
1082 const processor_alias_table[] =
1084 {"i386", PROCESSOR_I386, 0},
1085 {"i486", PROCESSOR_I486, 0},
1086 {"i586", PROCESSOR_PENTIUM, 0},
1087 {"pentium", PROCESSOR_PENTIUM, 0},
1088 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1089 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1090 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1091 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1092 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1093 {"i686", PROCESSOR_PENTIUMPRO, 0},
1094 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1095 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1096 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1097 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1098 PTA_MMX | PTA_PREFETCH_SSE},
1099 {"k6", PROCESSOR_K6, PTA_MMX},
1100 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1101 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1102 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1104 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1105 | PTA_3DNOW | PTA_3DNOW_A},
1106 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1113 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1116 int const pta_size = ARRAY_SIZE (processor_alias_table);
1118 /* Set the default values for switches whose default depends on TARGET_64BIT
1119 in case they weren't overwritten by command line options. */
1122 if (flag_omit_frame_pointer == 2)
1123 flag_omit_frame_pointer = 1;
1124 if (flag_asynchronous_unwind_tables == 2)
1125 flag_asynchronous_unwind_tables = 1;
1126 if (flag_pcc_struct_return == 2)
1127 flag_pcc_struct_return = 0;
1131 if (flag_omit_frame_pointer == 2)
1132 flag_omit_frame_pointer = 0;
1133 if (flag_asynchronous_unwind_tables == 2)
1134 flag_asynchronous_unwind_tables = 0;
1135 if (flag_pcc_struct_return == 2)
1136 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1139 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1140 SUBTARGET_OVERRIDE_OPTIONS;
1143 if (!ix86_tune_string && ix86_arch_string)
1144 ix86_tune_string = ix86_arch_string;
1145 if (!ix86_tune_string)
1146 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1147 if (!ix86_arch_string)
1148 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1150 if (ix86_cmodel_string != 0)
1152 if (!strcmp (ix86_cmodel_string, "small"))
1153 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1155 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1156 else if (!strcmp (ix86_cmodel_string, "32"))
1157 ix86_cmodel = CM_32;
1158 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1159 ix86_cmodel = CM_KERNEL;
1160 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1161 ix86_cmodel = CM_MEDIUM;
1162 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1163 ix86_cmodel = CM_LARGE;
1165 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1169 ix86_cmodel = CM_32;
1171 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1173 if (ix86_asm_string != 0)
1175 if (!strcmp (ix86_asm_string, "intel"))
1176 ix86_asm_dialect = ASM_INTEL;
1177 else if (!strcmp (ix86_asm_string, "att"))
1178 ix86_asm_dialect = ASM_ATT;
1180 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1182 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1183 error ("code model `%s' not supported in the %s bit mode",
1184 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1185 if (ix86_cmodel == CM_LARGE)
1186 sorry ("code model `large' not supported yet");
1187 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1188 sorry ("%i-bit mode not compiled in",
1189 (target_flags & MASK_64BIT) ? 64 : 32);
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1194 ix86_arch = processor_alias_table[i].processor;
1195 /* Default cpu tuning to the architecture. */
1196 ix86_tune = ix86_arch;
1197 if (processor_alias_table[i].flags & PTA_MMX
1198 && !(target_flags_explicit & MASK_MMX))
1199 target_flags |= MASK_MMX;
1200 if (processor_alias_table[i].flags & PTA_3DNOW
1201 && !(target_flags_explicit & MASK_3DNOW))
1202 target_flags |= MASK_3DNOW;
1203 if (processor_alias_table[i].flags & PTA_3DNOW_A
1204 && !(target_flags_explicit & MASK_3DNOW_A))
1205 target_flags |= MASK_3DNOW_A;
1206 if (processor_alias_table[i].flags & PTA_SSE
1207 && !(target_flags_explicit & MASK_SSE))
1208 target_flags |= MASK_SSE;
1209 if (processor_alias_table[i].flags & PTA_SSE2
1210 && !(target_flags_explicit & MASK_SSE2))
1211 target_flags |= MASK_SSE2;
1212 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1213 x86_prefetch_sse = true;
1214 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1215 error ("CPU you selected does not support x86-64 instruction set");
1220 error ("bad value (%s) for -march= switch", ix86_arch_string);
1222 for (i = 0; i < pta_size; i++)
1223 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1225 ix86_tune = processor_alias_table[i].processor;
1226 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1227 error ("CPU you selected does not support x86-64 instruction set");
1230 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1231 x86_prefetch_sse = true;
1233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1236 ix86_cost = &size_cost;
1238 ix86_cost = processor_target_table[ix86_tune].cost;
1239 target_flags |= processor_target_table[ix86_tune].target_enable;
1240 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1242 /* Arrange to set up i386_stack_locals for all functions. */
1243 init_machine_status = ix86_init_machine_status;
1245 /* Validate -mregparm= value. */
1246 if (ix86_regparm_string)
1248 i = atoi (ix86_regparm_string);
1249 if (i < 0 || i > REGPARM_MAX)
1250 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1256 ix86_regparm = REGPARM_MAX;
1258 /* If the user has provided any of the -malign-* options,
1259 warn and use that value only if -falign-* is not set.
1260 Remove this code in GCC 3.2 or later. */
1261 if (ix86_align_loops_string)
1263 warning ("-malign-loops is obsolete, use -falign-loops");
1264 if (align_loops == 0)
1266 i = atoi (ix86_align_loops_string);
1267 if (i < 0 || i > MAX_CODE_ALIGN)
1268 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1270 align_loops = 1 << i;
1274 if (ix86_align_jumps_string)
1276 warning ("-malign-jumps is obsolete, use -falign-jumps");
1277 if (align_jumps == 0)
1279 i = atoi (ix86_align_jumps_string);
1280 if (i < 0 || i > MAX_CODE_ALIGN)
1281 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1283 align_jumps = 1 << i;
1287 if (ix86_align_funcs_string)
1289 warning ("-malign-functions is obsolete, use -falign-functions");
1290 if (align_functions == 0)
1292 i = atoi (ix86_align_funcs_string);
1293 if (i < 0 || i > MAX_CODE_ALIGN)
1294 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1296 align_functions = 1 << i;
1300 /* Default align_* from the processor table. */
1301 if (align_loops == 0)
1303 align_loops = processor_target_table[ix86_tune].align_loop;
1304 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1306 if (align_jumps == 0)
1308 align_jumps = processor_target_table[ix86_tune].align_jump;
1309 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1311 if (align_functions == 0)
1313 align_functions = processor_target_table[ix86_tune].align_func;
1316 /* Validate -mpreferred-stack-boundary= value, or provide default.
1317 The default of 128 bits is for Pentium III's SSE __m128, but we
1318 don't want additional code to keep the stack aligned when
1319 optimizing for code size. */
1320 ix86_preferred_stack_boundary = (optimize_size
1321 ? TARGET_64BIT ? 128 : 32
1323 if (ix86_preferred_stack_boundary_string)
1325 i = atoi (ix86_preferred_stack_boundary_string);
1326 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1327 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1328 TARGET_64BIT ? 4 : 2);
1330 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1333 /* Validate -mbranch-cost= value, or provide default. */
1334 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1335 if (ix86_branch_cost_string)
1337 i = atoi (ix86_branch_cost_string);
1339 error ("-mbranch-cost=%d is not between 0 and 5", i);
1341 ix86_branch_cost = i;
1344 if (ix86_tls_dialect_string)
1346 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1347 ix86_tls_dialect = TLS_DIALECT_GNU;
1348 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1349 ix86_tls_dialect = TLS_DIALECT_SUN;
1351 error ("bad value (%s) for -mtls-dialect= switch",
1352 ix86_tls_dialect_string);
1355 /* Keep nonleaf frame pointers. */
1356 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1357 flag_omit_frame_pointer = 1;
1359 /* If we're doing fast math, we don't care about comparison order
1360 wrt NaNs. This lets us use a shorter comparison sequence. */
1361 if (flag_unsafe_math_optimizations)
1362 target_flags &= ~MASK_IEEE_FP;
1364 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1365 since the insns won't need emulation. */
1366 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1367 target_flags &= ~MASK_NO_FANCY_MATH_387;
1369 /* Turn on SSE2 builtins for -mpni. */
1371 target_flags |= MASK_SSE2;
1373 /* Turn on SSE builtins for -msse2. */
1375 target_flags |= MASK_SSE;
1379 if (TARGET_ALIGN_DOUBLE)
1380 error ("-malign-double makes no sense in the 64bit mode");
1382 error ("-mrtd calling convention not supported in the 64bit mode");
1383 /* Enable by default the SSE and MMX builtins. */
1384 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1385 ix86_fpmath = FPMATH_SSE;
1389 ix86_fpmath = FPMATH_387;
1390 /* i386 ABI does not specify red zone. It still makes sense to use it
1391 when programmer takes care to stack from being destroyed. */
1392 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1393 target_flags |= MASK_NO_RED_ZONE;
1396 if (ix86_fpmath_string != 0)
1398 if (! strcmp (ix86_fpmath_string, "387"))
1399 ix86_fpmath = FPMATH_387;
1400 else if (! strcmp (ix86_fpmath_string, "sse"))
1404 warning ("SSE instruction set disabled, using 387 arithmetics");
1405 ix86_fpmath = FPMATH_387;
1408 ix86_fpmath = FPMATH_SSE;
1410 else if (! strcmp (ix86_fpmath_string, "387,sse")
1411 || ! strcmp (ix86_fpmath_string, "sse,387"))
1415 warning ("SSE instruction set disabled, using 387 arithmetics");
1416 ix86_fpmath = FPMATH_387;
1418 else if (!TARGET_80387)
1420 warning ("387 instruction set disabled, using SSE arithmetics");
1421 ix86_fpmath = FPMATH_SSE;
1424 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1427 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1430 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1434 target_flags |= MASK_MMX;
1435 x86_prefetch_sse = true;
1438 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1441 target_flags |= MASK_MMX;
1442 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1443 extensions it adds. */
1444 if (x86_3dnow_a & (1 << ix86_arch))
1445 target_flags |= MASK_3DNOW_A;
1447 if ((x86_accumulate_outgoing_args & TUNEMASK)
1448 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1450 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1452 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1455 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1456 p = strchr (internal_label_prefix, 'X');
1457 internal_label_prefix_len = p - internal_label_prefix;
1463 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1465 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1466 make the problem with not enough registers even worse. */
1467 #ifdef INSN_SCHEDULING
1469 flag_schedule_insns = 0;
1472 /* The default values of these switches depend on the TARGET_64BIT
1473 that is not known at this moment. Mark these values with 2 and
1474 let user the to override these. In case there is no command line option
1475 specifying them, we will set the defaults in override_options. */
1477 flag_omit_frame_pointer = 2;
1478 flag_pcc_struct_return = 2;
1479 flag_asynchronous_unwind_tables = 2;
1482 /* Table of valid machine attributes. */
1483 const struct attribute_spec ix86_attribute_table[] =
1485 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1486 /* Stdcall attribute says callee is responsible for popping arguments
1487 if they are not variable. */
1488 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1489 /* Fastcall attribute says callee is responsible for popping arguments
1490 if they are not variable. */
1491 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1492 /* Cdecl attribute says the callee is a normal C declaration */
1493 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1494 /* Regparm attribute specifies how many integer arguments are to be
1495 passed in registers. */
1496 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1497 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1498 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1499 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1502 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1503 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { NULL, 0, 0, false, false, false, NULL }
1507 /* Decide whether we can make a sibling call to a function. DECL is the
1508 declaration of the function being targeted by the call and EXP is the
1509 CALL_EXPR representing the call. */
1512 ix86_function_ok_for_sibcall (tree decl, tree exp)
1514 /* If we are generating position-independent code, we cannot sibcall
1515 optimize any indirect call, or a direct call to a global function,
1516 as the PLT requires %ebx be live. */
1517 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1520 /* If we are returning floats on the 80387 register stack, we cannot
1521 make a sibcall from a function that doesn't return a float to a
1522 function that does or, conversely, from a function that does return
1523 a float to a function that doesn't; the necessary stack adjustment
1524 would not be executed. */
1525 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1526 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1529 /* If this call is indirect, we'll need to be able to use a call-clobbered
1530 register for the address of the target function. Make sure that all
1531 such registers are not used for passing parameters. */
1532 if (!decl && !TARGET_64BIT)
1536 /* We're looking at the CALL_EXPR, we need the type of the function. */
1537 type = TREE_OPERAND (exp, 0); /* pointer expression */
1538 type = TREE_TYPE (type); /* pointer type */
1539 type = TREE_TYPE (type); /* function type */
1541 if (ix86_function_regparm (type, NULL) >= 3)
1543 /* ??? Need to count the actual number of registers to be used,
1544 not the possible number of registers. Fix later. */
1549 /* Otherwise okay. That also includes certain types of indirect calls. */
1553 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1554 arguments as in struct attribute_spec.handler. */
1556 ix86_handle_cdecl_attribute (tree *node, tree name,
1557 tree args ATTRIBUTE_UNUSED,
1558 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1560 if (TREE_CODE (*node) != FUNCTION_TYPE
1561 && TREE_CODE (*node) != METHOD_TYPE
1562 && TREE_CODE (*node) != FIELD_DECL
1563 && TREE_CODE (*node) != TYPE_DECL)
1565 warning ("`%s' attribute only applies to functions",
1566 IDENTIFIER_POINTER (name));
1567 *no_add_attrs = true;
1571 if (is_attribute_p ("fastcall", name))
1573 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1575 error ("fastcall and stdcall attributes are not compatible");
1577 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1579 error ("fastcall and regparm attributes are not compatible");
1582 else if (is_attribute_p ("stdcall", name))
1584 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1586 error ("fastcall and stdcall attributes are not compatible");
1593 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1600 /* Handle a "regparm" attribute;
1601 arguments as in struct attribute_spec.handler. */
1603 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1604 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1606 if (TREE_CODE (*node) != FUNCTION_TYPE
1607 && TREE_CODE (*node) != METHOD_TYPE
1608 && TREE_CODE (*node) != FIELD_DECL
1609 && TREE_CODE (*node) != TYPE_DECL)
1611 warning ("`%s' attribute only applies to functions",
1612 IDENTIFIER_POINTER (name));
1613 *no_add_attrs = true;
1619 cst = TREE_VALUE (args);
1620 if (TREE_CODE (cst) != INTEGER_CST)
1622 warning ("`%s' attribute requires an integer constant argument",
1623 IDENTIFIER_POINTER (name));
1624 *no_add_attrs = true;
1626 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1628 warning ("argument to `%s' attribute larger than %d",
1629 IDENTIFIER_POINTER (name), REGPARM_MAX);
1630 *no_add_attrs = true;
1633 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1635 error ("fastcall and regparm attributes are not compatible");
1642 /* Return 0 if the attributes for two types are incompatible, 1 if they
1643 are compatible, and 2 if they are nearly compatible (which causes a
1644 warning to be generated). */
1647 ix86_comp_type_attributes (tree type1, tree type2)
1649 /* Check for mismatch of non-default calling convention. */
1650 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1652 if (TREE_CODE (type1) != FUNCTION_TYPE)
1655 /* Check for mismatched fastcall types */
1656 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1657 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1660 /* Check for mismatched return types (cdecl vs stdcall). */
1661 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1662 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1667 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1668 DECL may be NULL when calling function indirectly
1669 or considering a libcall. */
1672 ix86_function_regparm (tree type, tree decl)
1675 int regparm = ix86_regparm;
1676 bool user_convention = false;
1680 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1683 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1684 user_convention = true;
1687 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1690 user_convention = true;
1693 /* Use register calling convention for local functions when possible. */
1694 if (!TARGET_64BIT && !user_convention && decl
1695 && flag_unit_at_a_time && !profile_flag)
1697 struct cgraph_local_info *i = cgraph_local_info (decl);
1700 /* We can't use regparm(3) for nested functions as these use
1701 static chain pointer in third argument. */
1702 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1712 /* Return true if EAX is live at the start of the function. Used by
1713 ix86_expand_prologue to determine if we need special help before
1714 calling allocate_stack_worker. */
1717 ix86_eax_live_at_start_p (void)
1719 /* Cheat. Don't bother working forward from ix86_function_regparm
1720 to the function type to whether an actual argument is located in
1721 eax. Instead just look at cfg info, which is still close enough
1722 to correct at this point. This gives false positives for broken
1723 functions that might use uninitialized data that happens to be
1724 allocated in eax, but who cares? */
1725 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1728 /* Value is the number of bytes of arguments automatically
1729 popped when returning from a subroutine call.
1730 FUNDECL is the declaration node of the function (as a tree),
1731 FUNTYPE is the data type of the function (as a tree),
1732 or for a library call it is an identifier node for the subroutine name.
1733 SIZE is the number of bytes of arguments passed on the stack.
1735 On the 80386, the RTD insn may be used to pop them if the number
1736 of args is fixed, but if the number is variable then the caller
1737 must pop them all. RTD can't be used for library calls now
1738 because the library is compiled with the Unix compiler.
1739 Use of RTD is a selectable option, since it is incompatible with
1740 standard Unix calling sequences. If the option is not selected,
1741 the caller must always pop the args.
1743 The attribute stdcall is equivalent to RTD on a per module basis. */
1746 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1748 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1750 /* Cdecl functions override -mrtd, and never pop the stack. */
1751 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1753 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1754 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1755 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1759 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1760 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1761 == void_type_node)))
1765 /* Lose any fake structure return argument if it is passed on the stack. */
1766 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1769 int nregs = ix86_function_regparm (funtype, fundecl);
1772 return GET_MODE_SIZE (Pmode);
1778 /* Argument support functions. */
1780 /* Return true when register may be used to pass function parameters. */
1782 ix86_function_arg_regno_p (int regno)
1786 return (regno < REGPARM_MAX
1787 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1788 if (SSE_REGNO_P (regno) && TARGET_SSE)
1790 /* RAX is used as hidden argument to va_arg functions. */
1793 for (i = 0; i < REGPARM_MAX; i++)
1794 if (regno == x86_64_int_parameter_registers[i])
1799 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1800 for a call to a function whose data type is FNTYPE.
1801 For a library call, FNTYPE is 0. */
1804 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1805 tree fntype, /* tree ptr for function decl */
1806 rtx libname, /* SYMBOL_REF of library name or 0 */
1809 static CUMULATIVE_ARGS zero_cum;
1810 tree param, next_param;
1812 if (TARGET_DEBUG_ARG)
1814 fprintf (stderr, "\ninit_cumulative_args (");
1816 fprintf (stderr, "fntype code = %s, ret code = %s",
1817 tree_code_name[(int) TREE_CODE (fntype)],
1818 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1820 fprintf (stderr, "no fntype");
1823 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1828 /* Set up the number of registers to use for passing arguments. */
1830 cum->nregs = ix86_function_regparm (fntype, fndecl);
1832 cum->nregs = ix86_regparm;
1833 cum->sse_nregs = SSE_REGPARM_MAX;
1834 cum->maybe_vaarg = false;
1836 /* Use ecx and edx registers if function has fastcall attribute */
1837 if (fntype && !TARGET_64BIT)
1839 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1847 /* Determine if this function has variable arguments. This is
1848 indicated by the last argument being 'void_type_mode' if there
1849 are no variable arguments. If there are variable arguments, then
1850 we won't pass anything in registers */
1854 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1855 param != 0; param = next_param)
1857 next_param = TREE_CHAIN (param);
1858 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1865 cum->maybe_vaarg = true;
1869 if ((!fntype && !libname)
1870 || (fntype && !TYPE_ARG_TYPES (fntype)))
1871 cum->maybe_vaarg = 1;
1873 if (TARGET_DEBUG_ARG)
1874 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1879 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1880 of this code is to classify each 8bytes of incoming argument by the register
1881 class and assign registers accordingly. */
1883 /* Return the union class of CLASS1 and CLASS2.
1884 See the x86-64 PS ABI for details. */
1886 static enum x86_64_reg_class
1887 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1889 /* Rule #1: If both classes are equal, this is the resulting class. */
1890 if (class1 == class2)
1893 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1895 if (class1 == X86_64_NO_CLASS)
1897 if (class2 == X86_64_NO_CLASS)
1900 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1901 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1902 return X86_64_MEMORY_CLASS;
1904 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1905 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1906 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1907 return X86_64_INTEGERSI_CLASS;
1908 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1909 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1910 return X86_64_INTEGER_CLASS;
1912 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1913 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1914 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1915 return X86_64_MEMORY_CLASS;
1917 /* Rule #6: Otherwise class SSE is used. */
1918 return X86_64_SSE_CLASS;
1921 /* Classify the argument of type TYPE and mode MODE.
1922 CLASSES will be filled by the register class used to pass each word
1923 of the operand. The number of words is returned. In case the parameter
1924 should be passed in memory, 0 is returned. As a special case for zero
1925 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1927 BIT_OFFSET is used internally for handling records and specifies offset
1928 of the offset in bits modulo 256 to avoid overflow cases.
1930 See the x86-64 PS ABI for details.
1934 classify_argument (enum machine_mode mode, tree type,
1935 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1937 HOST_WIDE_INT bytes =
1938 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1939 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1941 /* Variable sized entities are always passed/returned in memory. */
1945 if (mode != VOIDmode
1946 && MUST_PASS_IN_STACK (mode, type))
1949 if (type && AGGREGATE_TYPE_P (type))
1953 enum x86_64_reg_class subclasses[MAX_CLASSES];
1955 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1959 for (i = 0; i < words; i++)
1960 classes[i] = X86_64_NO_CLASS;
1962 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1963 signalize memory class, so handle it as special case. */
1966 classes[0] = X86_64_NO_CLASS;
1970 /* Classify each field of record and merge classes. */
1971 if (TREE_CODE (type) == RECORD_TYPE)
1973 /* For classes first merge in the field of the subclasses. */
1974 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1976 tree bases = TYPE_BINFO_BASETYPES (type);
1977 int n_bases = TREE_VEC_LENGTH (bases);
1980 for (i = 0; i < n_bases; ++i)
1982 tree binfo = TREE_VEC_ELT (bases, i);
1984 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1985 tree type = BINFO_TYPE (binfo);
1987 num = classify_argument (TYPE_MODE (type),
1989 (offset + bit_offset) % 256);
1992 for (i = 0; i < num; i++)
1994 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1996 merge_classes (subclasses[i], classes[i + pos]);
2000 /* And now merge the fields of structure. */
2001 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2003 if (TREE_CODE (field) == FIELD_DECL)
2007 /* Bitfields are always classified as integer. Handle them
2008 early, since later code would consider them to be
2009 misaligned integers. */
2010 if (DECL_BIT_FIELD (field))
2012 for (i = int_bit_position (field) / 8 / 8;
2013 i < (int_bit_position (field)
2014 + tree_low_cst (DECL_SIZE (field), 0)
2017 merge_classes (X86_64_INTEGER_CLASS,
2022 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2023 TREE_TYPE (field), subclasses,
2024 (int_bit_position (field)
2025 + bit_offset) % 256);
2028 for (i = 0; i < num; i++)
2031 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2033 merge_classes (subclasses[i], classes[i + pos]);
2039 /* Arrays are handled as small records. */
2040 else if (TREE_CODE (type) == ARRAY_TYPE)
2043 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2044 TREE_TYPE (type), subclasses, bit_offset);
2048 /* The partial classes are now full classes. */
2049 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2050 subclasses[0] = X86_64_SSE_CLASS;
2051 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2052 subclasses[0] = X86_64_INTEGER_CLASS;
2054 for (i = 0; i < words; i++)
2055 classes[i] = subclasses[i % num];
2057 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2058 else if (TREE_CODE (type) == UNION_TYPE
2059 || TREE_CODE (type) == QUAL_UNION_TYPE)
2061 /* For classes first merge in the field of the subclasses. */
2062 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2064 tree bases = TYPE_BINFO_BASETYPES (type);
2065 int n_bases = TREE_VEC_LENGTH (bases);
2068 for (i = 0; i < n_bases; ++i)
2070 tree binfo = TREE_VEC_ELT (bases, i);
2072 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2073 tree type = BINFO_TYPE (binfo);
2075 num = classify_argument (TYPE_MODE (type),
2077 (offset + (bit_offset % 64)) % 256);
2080 for (i = 0; i < num; i++)
2082 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2084 merge_classes (subclasses[i], classes[i + pos]);
2088 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2090 if (TREE_CODE (field) == FIELD_DECL)
2093 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2094 TREE_TYPE (field), subclasses,
2098 for (i = 0; i < num; i++)
2099 classes[i] = merge_classes (subclasses[i], classes[i]);
2103 else if (TREE_CODE (type) == SET_TYPE)
2107 classes[0] = X86_64_INTEGERSI_CLASS;
2110 else if (bytes <= 8)
2112 classes[0] = X86_64_INTEGER_CLASS;
2115 else if (bytes <= 12)
2117 classes[0] = X86_64_INTEGER_CLASS;
2118 classes[1] = X86_64_INTEGERSI_CLASS;
2123 classes[0] = X86_64_INTEGER_CLASS;
2124 classes[1] = X86_64_INTEGER_CLASS;
2131 /* Final merger cleanup. */
2132 for (i = 0; i < words; i++)
2134 /* If one class is MEMORY, everything should be passed in
2136 if (classes[i] == X86_64_MEMORY_CLASS)
2139 /* The X86_64_SSEUP_CLASS should be always preceded by
2140 X86_64_SSE_CLASS. */
2141 if (classes[i] == X86_64_SSEUP_CLASS
2142 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2143 classes[i] = X86_64_SSE_CLASS;
2145 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2146 if (classes[i] == X86_64_X87UP_CLASS
2147 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2148 classes[i] = X86_64_SSE_CLASS;
2153 /* Compute alignment needed. We align all types to natural boundaries with
2154 exception of XFmode that is aligned to 64bits. */
2155 if (mode != VOIDmode && mode != BLKmode)
2157 int mode_alignment = GET_MODE_BITSIZE (mode);
2160 mode_alignment = 128;
2161 else if (mode == XCmode)
2162 mode_alignment = 256;
2163 /* Misaligned fields are always returned in memory. */
2164 if (bit_offset % mode_alignment)
2168 /* Classification of atomic types. */
2178 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2179 classes[0] = X86_64_INTEGERSI_CLASS;
2181 classes[0] = X86_64_INTEGER_CLASS;
2185 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2188 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2189 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2192 if (!(bit_offset % 64))
2193 classes[0] = X86_64_SSESF_CLASS;
2195 classes[0] = X86_64_SSE_CLASS;
2198 classes[0] = X86_64_SSEDF_CLASS;
2201 classes[0] = X86_64_X87_CLASS;
2202 classes[1] = X86_64_X87UP_CLASS;
2208 classes[0] = X86_64_X87_CLASS;
2209 classes[1] = X86_64_X87UP_CLASS;
2210 classes[2] = X86_64_X87_CLASS;
2211 classes[3] = X86_64_X87UP_CLASS;
2214 classes[0] = X86_64_SSEDF_CLASS;
2215 classes[1] = X86_64_SSEDF_CLASS;
2218 classes[0] = X86_64_SSE_CLASS;
2226 classes[0] = X86_64_SSE_CLASS;
2227 classes[1] = X86_64_SSEUP_CLASS;
2242 /* Examine the argument and return set number of register required in each
2243 class. Return 0 iff parameter should be passed in memory. */
2245 examine_argument (enum machine_mode mode, tree type, int in_return,
2246 int *int_nregs, int *sse_nregs)
2248 enum x86_64_reg_class class[MAX_CLASSES];
2249 int n = classify_argument (mode, type, class, 0);
2255 for (n--; n >= 0; n--)
2258 case X86_64_INTEGER_CLASS:
2259 case X86_64_INTEGERSI_CLASS:
2262 case X86_64_SSE_CLASS:
2263 case X86_64_SSESF_CLASS:
2264 case X86_64_SSEDF_CLASS:
2267 case X86_64_NO_CLASS:
2268 case X86_64_SSEUP_CLASS:
2270 case X86_64_X87_CLASS:
2271 case X86_64_X87UP_CLASS:
2275 case X86_64_MEMORY_CLASS:
2280 /* Construct container for the argument used by GCC interface. See
2281 FUNCTION_ARG for the detailed description. */
2283 construct_container (enum machine_mode mode, tree type, int in_return,
2284 int nintregs, int nsseregs, const int * intreg,
2287 enum machine_mode tmpmode;
2289 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2290 enum x86_64_reg_class class[MAX_CLASSES];
2294 int needed_sseregs, needed_intregs;
2295 rtx exp[MAX_CLASSES];
2298 n = classify_argument (mode, type, class, 0);
2299 if (TARGET_DEBUG_ARG)
2302 fprintf (stderr, "Memory class\n");
2305 fprintf (stderr, "Classes:");
2306 for (i = 0; i < n; i++)
2308 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2310 fprintf (stderr, "\n");
2315 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2317 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2320 /* First construct simple cases. Avoid SCmode, since we want to use
2321 single register to pass this type. */
2322 if (n == 1 && mode != SCmode)
2325 case X86_64_INTEGER_CLASS:
2326 case X86_64_INTEGERSI_CLASS:
2327 return gen_rtx_REG (mode, intreg[0]);
2328 case X86_64_SSE_CLASS:
2329 case X86_64_SSESF_CLASS:
2330 case X86_64_SSEDF_CLASS:
2331 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2332 case X86_64_X87_CLASS:
2333 return gen_rtx_REG (mode, FIRST_STACK_REG);
2334 case X86_64_NO_CLASS:
2335 /* Zero sized array, struct or class. */
2340 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2341 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2343 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2344 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2345 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2346 && class[1] == X86_64_INTEGER_CLASS
2347 && (mode == CDImode || mode == TImode || mode == TFmode)
2348 && intreg[0] + 1 == intreg[1])
2349 return gen_rtx_REG (mode, intreg[0]);
2351 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2352 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2353 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2355 /* Otherwise figure out the entries of the PARALLEL. */
2356 for (i = 0; i < n; i++)
2360 case X86_64_NO_CLASS:
2362 case X86_64_INTEGER_CLASS:
2363 case X86_64_INTEGERSI_CLASS:
2364 /* Merge TImodes on aligned occasions here too. */
2365 if (i * 8 + 8 > bytes)
2366 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2367 else if (class[i] == X86_64_INTEGERSI_CLASS)
2371 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2372 if (tmpmode == BLKmode)
2374 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2375 gen_rtx_REG (tmpmode, *intreg),
2379 case X86_64_SSESF_CLASS:
2380 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2381 gen_rtx_REG (SFmode,
2382 SSE_REGNO (sse_regno)),
2386 case X86_64_SSEDF_CLASS:
2387 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2388 gen_rtx_REG (DFmode,
2389 SSE_REGNO (sse_regno)),
2393 case X86_64_SSE_CLASS:
2394 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2398 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2399 gen_rtx_REG (tmpmode,
2400 SSE_REGNO (sse_regno)),
2402 if (tmpmode == TImode)
2410 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2411 for (i = 0; i < nexps; i++)
2412 XVECEXP (ret, 0, i) = exp [i];
2416 /* Update the data in CUM to advance over an argument
2417 of mode MODE and data type TYPE.
2418 (TYPE is null for libcalls where that information may not be available.) */
2421 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2422 enum machine_mode mode, /* current arg mode */
2423 tree type, /* type of the argument or 0 if lib support */
2424 int named) /* whether or not the argument was named */
2427 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2428 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2430 if (TARGET_DEBUG_ARG)
2432 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2433 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2436 int int_nregs, sse_nregs;
2437 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2438 cum->words += words;
2439 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2441 cum->nregs -= int_nregs;
2442 cum->sse_nregs -= sse_nregs;
2443 cum->regno += int_nregs;
2444 cum->sse_regno += sse_nregs;
2447 cum->words += words;
2451 if (TARGET_SSE && mode == TImode)
2453 cum->sse_words += words;
2454 cum->sse_nregs -= 1;
2455 cum->sse_regno += 1;
2456 if (cum->sse_nregs <= 0)
2464 cum->words += words;
2465 cum->nregs -= words;
2466 cum->regno += words;
2468 if (cum->nregs <= 0)
2478 /* Define where to put the arguments to a function.
2479 Value is zero to push the argument on the stack,
2480 or a hard register in which to store the argument.
2482 MODE is the argument's machine mode.
2483 TYPE is the data type of the argument (as a tree).
2484 This is null for libcalls where that information may
2486 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2487 the preceding args and about the function being called.
2488 NAMED is nonzero if this argument is a named parameter
2489 (otherwise it is an extra parameter matching an ellipsis). */
2492 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2493 enum machine_mode mode, /* current arg mode */
2494 tree type, /* type of the argument or 0 if lib support */
2495 int named) /* != 0 for normal args, == 0 for ... args */
2499 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2500 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2502 /* Handle a hidden AL argument containing number of registers for varargs
2503 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2505 if (mode == VOIDmode)
2508 return GEN_INT (cum->maybe_vaarg
2509 ? (cum->sse_nregs < 0
2517 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2518 &x86_64_int_parameter_registers [cum->regno],
2523 /* For now, pass fp/complex values on the stack. */
2535 if (words <= cum->nregs)
2537 int regno = cum->regno;
2539 /* Fastcall allocates the first two DWORD (SImode) or
2540 smaller arguments to ECX and EDX. */
2543 if (mode == BLKmode || mode == DImode)
2546 /* ECX not EAX is the first allocated register. */
2550 ret = gen_rtx_REG (mode, regno);
2555 ret = gen_rtx_REG (mode, cum->sse_regno);
2559 if (TARGET_DEBUG_ARG)
2562 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2563 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2566 print_simple_rtl (stderr, ret);
2568 fprintf (stderr, ", stack");
2570 fprintf (stderr, " )\n");
2576 /* A C expression that indicates when an argument must be passed by
2577 reference. If nonzero for an argument, a copy of that argument is
2578 made in memory and a pointer to the argument is passed instead of
2579 the argument itself. The pointer is passed in whatever way is
2580 appropriate for passing a pointer to that type. */
2583 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2584 enum machine_mode mode ATTRIBUTE_UNUSED,
2585 tree type, int named ATTRIBUTE_UNUSED)
2590 if (type && int_size_in_bytes (type) == -1)
2592 if (TARGET_DEBUG_ARG)
2593 fprintf (stderr, "function_arg_pass_by_reference\n");
2600 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2603 contains_128bit_aligned_vector_p (tree type)
2605 enum machine_mode mode = TYPE_MODE (type);
2606 if (SSE_REG_MODE_P (mode)
2607 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2609 if (TYPE_ALIGN (type) < 128)
2612 if (AGGREGATE_TYPE_P (type))
2614 /* Walk the aggregates recursively. */
2615 if (TREE_CODE (type) == RECORD_TYPE
2616 || TREE_CODE (type) == UNION_TYPE
2617 || TREE_CODE (type) == QUAL_UNION_TYPE)
2621 if (TYPE_BINFO (type) != NULL
2622 && TYPE_BINFO_BASETYPES (type) != NULL)
2624 tree bases = TYPE_BINFO_BASETYPES (type);
2625 int n_bases = TREE_VEC_LENGTH (bases);
2628 for (i = 0; i < n_bases; ++i)
2630 tree binfo = TREE_VEC_ELT (bases, i);
2631 tree type = BINFO_TYPE (binfo);
2633 if (contains_128bit_aligned_vector_p (type))
2637 /* And now merge the fields of structure. */
2638 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2640 if (TREE_CODE (field) == FIELD_DECL
2641 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2645 /* Just for use if some languages passes arrays by value. */
2646 else if (TREE_CODE (type) == ARRAY_TYPE)
2648 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2657 /* Gives the alignment boundary, in bits, of an argument with the
2658 specified mode and type. */
2661 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2665 align = TYPE_ALIGN (type);
2667 align = GET_MODE_ALIGNMENT (mode);
2668 if (align < PARM_BOUNDARY)
2669 align = PARM_BOUNDARY;
2672 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2673 make an exception for SSE modes since these require 128bit
2676 The handling here differs from field_alignment. ICC aligns MMX
2677 arguments to 4 byte boundaries, while structure fields are aligned
2678 to 8 byte boundaries. */
2681 if (!SSE_REG_MODE_P (mode))
2682 align = PARM_BOUNDARY;
2686 if (!contains_128bit_aligned_vector_p (type))
2687 align = PARM_BOUNDARY;
2695 /* Return true if N is a possible register number of function value. */
2697 ix86_function_value_regno_p (int regno)
2701 return ((regno) == 0
2702 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2703 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2705 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2706 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2707 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2710 /* Define how to find the value returned by a function.
2711 VALTYPE is the data type of the value (as a tree).
2712 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2713 otherwise, FUNC is 0. */
2715 ix86_function_value (tree valtype)
2719 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2720 REGPARM_MAX, SSE_REGPARM_MAX,
2721 x86_64_int_return_registers, 0);
2722 /* For zero sized structures, construct_container return NULL, but we need
2723 to keep rest of compiler happy by returning meaningful value. */
2725 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2729 return gen_rtx_REG (TYPE_MODE (valtype),
2730 ix86_value_regno (TYPE_MODE (valtype)));
2733 /* Return false iff type is returned in memory. */
2735 ix86_return_in_memory (tree type)
2737 int needed_intregs, needed_sseregs, size;
2738 enum machine_mode mode = TYPE_MODE (type);
2741 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2743 if (mode == BLKmode)
2746 size = int_size_in_bytes (type);
2748 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2751 if (VECTOR_MODE_P (mode) || mode == TImode)
2753 /* User-created vectors small enough to fit in EAX. */
2757 /* MMX/3dNow values are returned on the stack, since we've
2758 got to EMMS/FEMMS before returning. */
2762 /* SSE values are returned in XMM0. */
2763 /* ??? Except when it doesn't exist? We have a choice of
2764 either (1) being abi incompatible with a -march switch,
2765 or (2) generating an error here. Given no good solution,
2766 I think the safest thing is one warning. The user won't
2767 be able to use -Werror, but... */
2778 warning ("SSE vector return without SSE enabled "
2793 /* Define how to find the value returned by a library function
2794 assuming the value has mode MODE. */
2796 ix86_libcall_value (enum machine_mode mode)
2806 return gen_rtx_REG (mode, FIRST_SSE_REG);
2809 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2814 return gen_rtx_REG (mode, 0);
2818 return gen_rtx_REG (mode, ix86_value_regno (mode));
2821 /* Given a mode, return the register to use for a return value. */
2824 ix86_value_regno (enum machine_mode mode)
2826 /* Floating point return values in %st(0). */
2827 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2828 return FIRST_FLOAT_REG;
2829 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2830 we prevent this case when sse is not available. */
2831 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2832 return FIRST_SSE_REG;
2833 /* Everything else in %eax. */
2837 /* Create the va_list data type. */
2840 ix86_build_builtin_va_list (void)
2842 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2844 /* For i386 we use plain pointer to argument area. */
2846 return build_pointer_type (char_type_node);
2848 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2849 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2851 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2852 unsigned_type_node);
2853 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2854 unsigned_type_node);
2855 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2857 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2860 DECL_FIELD_CONTEXT (f_gpr) = record;
2861 DECL_FIELD_CONTEXT (f_fpr) = record;
2862 DECL_FIELD_CONTEXT (f_ovf) = record;
2863 DECL_FIELD_CONTEXT (f_sav) = record;
2865 TREE_CHAIN (record) = type_decl;
2866 TYPE_NAME (record) = type_decl;
2867 TYPE_FIELDS (record) = f_gpr;
2868 TREE_CHAIN (f_gpr) = f_fpr;
2869 TREE_CHAIN (f_fpr) = f_ovf;
2870 TREE_CHAIN (f_ovf) = f_sav;
2872 layout_type (record);
2874 /* The correct type is an array type of one element. */
2875 return build_array_type (record, build_index_type (size_zero_node));
2878 /* Perform any needed actions needed for a function that is receiving a
2879 variable number of arguments.
2883 MODE and TYPE are the mode and type of the current parameter.
2885 PRETEND_SIZE is a variable that should be set to the amount of stack
2886 that must be pushed by the prolog to pretend that our caller pushed
2889 Normally, this macro will push all remaining incoming registers on the
2890 stack and set PRETEND_SIZE to the length of the registers pushed. */
2893 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2894 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2897 CUMULATIVE_ARGS next_cum;
2898 rtx save_area = NULL_RTX, mem;
2911 /* Indicate to allocate space on the stack for varargs save area. */
2912 ix86_save_varrargs_registers = 1;
2914 cfun->stack_alignment_needed = 128;
2916 fntype = TREE_TYPE (current_function_decl);
2917 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2918 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2919 != void_type_node));
2921 /* For varargs, we do not want to skip the dummy va_dcl argument.
2922 For stdargs, we do want to skip the last named argument. */
2925 function_arg_advance (&next_cum, mode, type, 1);
2928 save_area = frame_pointer_rtx;
2930 set = get_varargs_alias_set ();
2932 for (i = next_cum.regno; i < ix86_regparm; i++)
2934 mem = gen_rtx_MEM (Pmode,
2935 plus_constant (save_area, i * UNITS_PER_WORD));
2936 set_mem_alias_set (mem, set);
2937 emit_move_insn (mem, gen_rtx_REG (Pmode,
2938 x86_64_int_parameter_registers[i]));
2941 if (next_cum.sse_nregs)
2943 /* Now emit code to save SSE registers. The AX parameter contains number
2944 of SSE parameter registers used to call this function. We use
2945 sse_prologue_save insn template that produces computed jump across
2946 SSE saves. We need some preparation work to get this working. */
2948 label = gen_label_rtx ();
2949 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2951 /* Compute address to jump to :
2952 label - 5*eax + nnamed_sse_arguments*5 */
2953 tmp_reg = gen_reg_rtx (Pmode);
2954 nsse_reg = gen_reg_rtx (Pmode);
2955 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2956 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2957 gen_rtx_MULT (Pmode, nsse_reg,
2959 if (next_cum.sse_regno)
2962 gen_rtx_CONST (DImode,
2963 gen_rtx_PLUS (DImode,
2965 GEN_INT (next_cum.sse_regno * 4))));
2967 emit_move_insn (nsse_reg, label_ref);
2968 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2970 /* Compute address of memory block we save into. We always use pointer
2971 pointing 127 bytes after first byte to store - this is needed to keep
2972 instruction size limited by 4 bytes. */
2973 tmp_reg = gen_reg_rtx (Pmode);
2974 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2975 plus_constant (save_area,
2976 8 * REGPARM_MAX + 127)));
2977 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2978 set_mem_alias_set (mem, set);
2979 set_mem_align (mem, BITS_PER_WORD);
2981 /* And finally do the dirty job! */
2982 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2983 GEN_INT (next_cum.sse_regno), label));
2988 /* Implement va_start. */
2991 ix86_va_start (tree valist, rtx nextarg)
2993 HOST_WIDE_INT words, n_gpr, n_fpr;
2994 tree f_gpr, f_fpr, f_ovf, f_sav;
2995 tree gpr, fpr, ovf, sav, t;
2997 /* Only 64bit target needs something special. */
3000 std_expand_builtin_va_start (valist, nextarg);
3004 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3005 f_fpr = TREE_CHAIN (f_gpr);
3006 f_ovf = TREE_CHAIN (f_fpr);
3007 f_sav = TREE_CHAIN (f_ovf);
3009 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3010 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3011 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3012 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3013 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3015 /* Count number of gp and fp argument registers used. */
3016 words = current_function_args_info.words;
3017 n_gpr = current_function_args_info.regno;
3018 n_fpr = current_function_args_info.sse_regno;
3020 if (TARGET_DEBUG_ARG)
3021 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3022 (int) words, (int) n_gpr, (int) n_fpr);
3024 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3025 build_int_2 (n_gpr * 8, 0));
3026 TREE_SIDE_EFFECTS (t) = 1;
3027 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3029 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3030 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3031 TREE_SIDE_EFFECTS (t) = 1;
3032 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3034 /* Find the overflow area. */
3035 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3037 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3038 build_int_2 (words * UNITS_PER_WORD, 0));
3039 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3040 TREE_SIDE_EFFECTS (t) = 1;
3041 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3043 /* Find the register save area.
3044 Prologue of the function save it right above stack frame. */
3045 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3046 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3047 TREE_SIDE_EFFECTS (t) = 1;
3048 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3051 /* Implement va_arg. */
3053 ix86_va_arg (tree valist, tree type)
3055 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3056 tree f_gpr, f_fpr, f_ovf, f_sav;
3057 tree gpr, fpr, ovf, sav, t;
3059 rtx lab_false, lab_over = NULL_RTX;
3064 /* Only 64bit target needs something special. */
3067 return std_expand_builtin_va_arg (valist, type);
3070 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3071 f_fpr = TREE_CHAIN (f_gpr);
3072 f_ovf = TREE_CHAIN (f_fpr);
3073 f_sav = TREE_CHAIN (f_ovf);
3075 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3076 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3077 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3078 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3079 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3081 size = int_size_in_bytes (type);
3084 /* Passed by reference. */
3086 type = build_pointer_type (type);
3087 size = int_size_in_bytes (type);
3089 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3091 container = construct_container (TYPE_MODE (type), type, 0,
3092 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3094 * Pull the value out of the saved registers ...
3097 addr_rtx = gen_reg_rtx (Pmode);
3101 rtx int_addr_rtx, sse_addr_rtx;
3102 int needed_intregs, needed_sseregs;
3105 lab_over = gen_label_rtx ();
3106 lab_false = gen_label_rtx ();
3108 examine_argument (TYPE_MODE (type), type, 0,
3109 &needed_intregs, &needed_sseregs);
3112 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3113 || TYPE_ALIGN (type) > 128);
3115 /* In case we are passing structure, verify that it is consecutive block
3116 on the register save area. If not we need to do moves. */
3117 if (!need_temp && !REG_P (container))
3119 /* Verify that all registers are strictly consecutive */
3120 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3124 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3126 rtx slot = XVECEXP (container, 0, i);
3127 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3128 || INTVAL (XEXP (slot, 1)) != i * 16)
3136 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3138 rtx slot = XVECEXP (container, 0, i);
3139 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3140 || INTVAL (XEXP (slot, 1)) != i * 8)
3147 int_addr_rtx = addr_rtx;
3148 sse_addr_rtx = addr_rtx;
3152 int_addr_rtx = gen_reg_rtx (Pmode);
3153 sse_addr_rtx = gen_reg_rtx (Pmode);
3155 /* First ensure that we fit completely in registers. */
3158 emit_cmp_and_jump_insns (expand_expr
3159 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3160 GEN_INT ((REGPARM_MAX - needed_intregs +
3161 1) * 8), GE, const1_rtx, SImode,
3166 emit_cmp_and_jump_insns (expand_expr
3167 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3168 GEN_INT ((SSE_REGPARM_MAX -
3169 needed_sseregs + 1) * 16 +
3170 REGPARM_MAX * 8), GE, const1_rtx,
3171 SImode, 1, lab_false);
3174 /* Compute index to start of area used for integer regs. */
3177 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3178 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3179 if (r != int_addr_rtx)
3180 emit_move_insn (int_addr_rtx, r);
3184 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3185 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3186 if (r != sse_addr_rtx)
3187 emit_move_insn (sse_addr_rtx, r);
3195 /* Never use the memory itself, as it has the alias set. */
3196 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3197 mem = gen_rtx_MEM (BLKmode, x);
3198 force_operand (x, addr_rtx);
3199 set_mem_alias_set (mem, get_varargs_alias_set ());
3200 set_mem_align (mem, BITS_PER_UNIT);
3202 for (i = 0; i < XVECLEN (container, 0); i++)
3204 rtx slot = XVECEXP (container, 0, i);
3205 rtx reg = XEXP (slot, 0);
3206 enum machine_mode mode = GET_MODE (reg);
3212 if (SSE_REGNO_P (REGNO (reg)))
3214 src_addr = sse_addr_rtx;
3215 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3219 src_addr = int_addr_rtx;
3220 src_offset = REGNO (reg) * 8;
3222 src_mem = gen_rtx_MEM (mode, src_addr);
3223 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3224 src_mem = adjust_address (src_mem, mode, src_offset);
3225 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3226 emit_move_insn (dest_mem, src_mem);
3233 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3234 build_int_2 (needed_intregs * 8, 0));
3235 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3236 TREE_SIDE_EFFECTS (t) = 1;
3237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3242 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3243 build_int_2 (needed_sseregs * 16, 0));
3244 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3245 TREE_SIDE_EFFECTS (t) = 1;
3246 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3249 emit_jump_insn (gen_jump (lab_over));
3251 emit_label (lab_false);
3254 /* ... otherwise out of the overflow area. */
3256 /* Care for on-stack alignment if needed. */
3257 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3261 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3262 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3263 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3267 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3269 emit_move_insn (addr_rtx, r);
3272 build (PLUS_EXPR, TREE_TYPE (t), t,
3273 build_int_2 (rsize * UNITS_PER_WORD, 0));
3274 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3275 TREE_SIDE_EFFECTS (t) = 1;
3276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3279 emit_label (lab_over);
3283 r = gen_rtx_MEM (Pmode, addr_rtx);
3284 set_mem_alias_set (r, get_varargs_alias_set ());
3285 emit_move_insn (addr_rtx, r);
3291 /* Return nonzero if OP is either a i387 or SSE fp register. */
3293 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3295 return ANY_FP_REG_P (op);
3298 /* Return nonzero if OP is an i387 fp register. */
3300 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3302 return FP_REG_P (op);
3305 /* Return nonzero if OP is a non-fp register_operand. */
3307 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3309 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3312 /* Return nonzero if OP is a register operand other than an
3313 i387 fp register. */
3315 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3317 return register_operand (op, mode) && !FP_REG_P (op);
3320 /* Return nonzero if OP is general operand representable on x86_64. */
3323 x86_64_general_operand (rtx op, enum machine_mode mode)
3326 return general_operand (op, mode);
3327 if (nonimmediate_operand (op, mode))
3329 return x86_64_sign_extended_value (op);
3332 /* Return nonzero if OP is general operand representable on x86_64
3333 as either sign extended or zero extended constant. */
3336 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3339 return general_operand (op, mode);
3340 if (nonimmediate_operand (op, mode))
3342 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3345 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3348 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3351 return nonmemory_operand (op, mode);
3352 if (register_operand (op, mode))
3354 return x86_64_sign_extended_value (op);
3357 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3360 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3362 if (!TARGET_64BIT || !flag_pic)
3363 return nonmemory_operand (op, mode);
3364 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3366 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3371 /* Return nonzero if OPNUM's MEM should be matched
3372 in movabs* patterns. */
3375 ix86_check_movabs (rtx insn, int opnum)
3379 set = PATTERN (insn);
3380 if (GET_CODE (set) == PARALLEL)
3381 set = XVECEXP (set, 0, 0);
3382 if (GET_CODE (set) != SET)
3384 mem = XEXP (set, opnum);
3385 while (GET_CODE (mem) == SUBREG)
3386 mem = SUBREG_REG (mem);
3387 if (GET_CODE (mem) != MEM)
3389 return (volatile_ok || !MEM_VOLATILE_P (mem));
3392 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3395 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3398 return nonmemory_operand (op, mode);
3399 if (register_operand (op, mode))
3401 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3404 /* Return nonzero if OP is immediate operand representable on x86_64. */
3407 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3410 return immediate_operand (op, mode);
3411 return x86_64_sign_extended_value (op);
3414 /* Return nonzero if OP is immediate operand representable on x86_64. */
3417 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3419 return x86_64_zero_extended_value (op);
3422 /* Return nonzero if OP is (const_int 1), else return zero. */
3425 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3427 return op == const1_rtx;
3430 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3431 for shift & compare patterns, as shifting by 0 does not change flags),
3432 else return zero. */
3435 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3437 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3440 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3441 reference and a constant. */
3444 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3446 switch (GET_CODE (op))
3454 if (GET_CODE (op) == SYMBOL_REF
3455 || GET_CODE (op) == LABEL_REF
3456 || (GET_CODE (op) == UNSPEC
3457 && (XINT (op, 1) == UNSPEC_GOT
3458 || XINT (op, 1) == UNSPEC_GOTOFF
3459 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3461 if (GET_CODE (op) != PLUS
3462 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3466 if (GET_CODE (op) == SYMBOL_REF
3467 || GET_CODE (op) == LABEL_REF)
3469 /* Only @GOTOFF gets offsets. */
3470 if (GET_CODE (op) != UNSPEC
3471 || XINT (op, 1) != UNSPEC_GOTOFF)
3474 op = XVECEXP (op, 0, 0);
3475 if (GET_CODE (op) == SYMBOL_REF
3476 || GET_CODE (op) == LABEL_REF)
3485 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3488 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3490 if (GET_CODE (op) != CONST)
3495 if (GET_CODE (op) == UNSPEC
3496 && XINT (op, 1) == UNSPEC_GOTPCREL)
3498 if (GET_CODE (op) == PLUS
3499 && GET_CODE (XEXP (op, 0)) == UNSPEC
3500 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3505 if (GET_CODE (op) == UNSPEC)
3507 if (GET_CODE (op) != PLUS
3508 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3511 if (GET_CODE (op) == UNSPEC)
3517 /* Return true if OP is a symbolic operand that resolves locally. */
3520 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3522 if (GET_CODE (op) == CONST
3523 && GET_CODE (XEXP (op, 0)) == PLUS
3524 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3525 op = XEXP (XEXP (op, 0), 0);
3527 if (GET_CODE (op) == LABEL_REF)
3530 if (GET_CODE (op) != SYMBOL_REF)
3533 if (SYMBOL_REF_LOCAL_P (op))
3536 /* There is, however, a not insubstantial body of code in the rest of
3537 the compiler that assumes it can just stick the results of
3538 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3539 /* ??? This is a hack. Should update the body of the compiler to
3540 always create a DECL an invoke targetm.encode_section_info. */
3541 if (strncmp (XSTR (op, 0), internal_label_prefix,
3542 internal_label_prefix_len) == 0)
3548 /* Test for various thread-local symbols. */
3551 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3553 if (GET_CODE (op) != SYMBOL_REF)
3555 return SYMBOL_REF_TLS_MODEL (op);
3559 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3561 if (GET_CODE (op) != SYMBOL_REF)
3563 return SYMBOL_REF_TLS_MODEL (op) == kind;
3567 global_dynamic_symbolic_operand (register rtx op,
3568 enum machine_mode mode ATTRIBUTE_UNUSED)
3570 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3574 local_dynamic_symbolic_operand (register rtx op,
3575 enum machine_mode mode ATTRIBUTE_UNUSED)
3577 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3581 initial_exec_symbolic_operand (register rtx op,
3582 enum machine_mode mode ATTRIBUTE_UNUSED)
3584 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3588 local_exec_symbolic_operand (register rtx op,
3589 enum machine_mode mode ATTRIBUTE_UNUSED)
3591 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3594 /* Test for a valid operand for a call instruction. Don't allow the
3595 arg pointer register or virtual regs since they may decay into
3596 reg + const, which the patterns can't handle. */
3599 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3601 /* Disallow indirect through a virtual register. This leads to
3602 compiler aborts when trying to eliminate them. */
3603 if (GET_CODE (op) == REG
3604 && (op == arg_pointer_rtx
3605 || op == frame_pointer_rtx
3606 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3607 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3610 /* Disallow `call 1234'. Due to varying assembler lameness this
3611 gets either rejected or translated to `call .+1234'. */
3612 if (GET_CODE (op) == CONST_INT)
3615 /* Explicitly allow SYMBOL_REF even if pic. */
3616 if (GET_CODE (op) == SYMBOL_REF)
3619 /* Otherwise we can allow any general_operand in the address. */
3620 return general_operand (op, Pmode);
3623 /* Test for a valid operand for a call instruction. Don't allow the
3624 arg pointer register or virtual regs since they may decay into
3625 reg + const, which the patterns can't handle. */
3628 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3630 /* Disallow indirect through a virtual register. This leads to
3631 compiler aborts when trying to eliminate them. */
3632 if (GET_CODE (op) == REG
3633 && (op == arg_pointer_rtx
3634 || op == frame_pointer_rtx
3635 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3636 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3639 /* Explicitly allow SYMBOL_REF even if pic. */
3640 if (GET_CODE (op) == SYMBOL_REF)
3643 /* Otherwise we can only allow register operands. */
3644 return register_operand (op, Pmode);
3648 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3650 if (GET_CODE (op) == CONST
3651 && GET_CODE (XEXP (op, 0)) == PLUS
3652 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3653 op = XEXP (XEXP (op, 0), 0);
3654 return GET_CODE (op) == SYMBOL_REF;
3657 /* Match exactly zero and one. */
3660 const0_operand (register rtx op, enum machine_mode mode)
3662 return op == CONST0_RTX (mode);
3666 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3668 return op == const1_rtx;
3671 /* Match 2, 4, or 8. Used for leal multiplicands. */
3674 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3676 return (GET_CODE (op) == CONST_INT
3677 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3681 const_0_to_3_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3683 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3687 const_0_to_7_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3689 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3693 const_0_to_15_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3695 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3699 const_0_to_255_operand (register rtx op,
3700 enum machine_mode mode ATTRIBUTE_UNUSED)
3702 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3706 /* True if this is a constant appropriate for an increment or decrement. */
3709 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3711 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3712 registers, since carry flag is not set. */
3713 if (TARGET_PENTIUM4 && !optimize_size)
3715 return op == const1_rtx || op == constm1_rtx;
3718 /* Return nonzero if OP is acceptable as operand of DImode shift
3722 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3725 return nonimmediate_operand (op, mode);
3727 return register_operand (op, mode);
3730 /* Return false if this is the stack pointer, or any other fake
3731 register eliminable to the stack pointer. Otherwise, this is
3734 This is used to prevent esp from being used as an index reg.
3735 Which would only happen in pathological cases. */
3738 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3741 if (GET_CODE (t) == SUBREG)
3743 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3746 return register_operand (op, mode);
3750 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3752 return MMX_REG_P (op);
3755 /* Return false if this is any eliminable register. Otherwise
3759 general_no_elim_operand (register rtx op, enum machine_mode mode)
3762 if (GET_CODE (t) == SUBREG)
3764 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3765 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3766 || t == virtual_stack_dynamic_rtx)
3769 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3770 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3773 return general_operand (op, mode);
3776 /* Return false if this is any eliminable register. Otherwise
3777 register_operand or const_int. */
3780 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3783 if (GET_CODE (t) == SUBREG)
3785 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3786 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3787 || t == virtual_stack_dynamic_rtx)
3790 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3793 /* Return false if this is any eliminable register or stack register,
3794 otherwise work like register_operand. */
3797 index_register_operand (register rtx op, enum machine_mode mode)
3800 if (GET_CODE (t) == SUBREG)
3804 if (t == arg_pointer_rtx
3805 || t == frame_pointer_rtx
3806 || t == virtual_incoming_args_rtx
3807 || t == virtual_stack_vars_rtx
3808 || t == virtual_stack_dynamic_rtx
3809 || REGNO (t) == STACK_POINTER_REGNUM)
3812 return general_operand (op, mode);
3815 /* Return true if op is a Q_REGS class register. */
3818 q_regs_operand (register rtx op, enum machine_mode mode)
3820 if (mode != VOIDmode && GET_MODE (op) != mode)
3822 if (GET_CODE (op) == SUBREG)
3823 op = SUBREG_REG (op);
3824 return ANY_QI_REG_P (op);
3827 /* Return true if op is an flags register. */
3830 flags_reg_operand (register rtx op, enum machine_mode mode)
3832 if (mode != VOIDmode && GET_MODE (op) != mode)
3834 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3837 /* Return true if op is a NON_Q_REGS class register. */
3840 non_q_regs_operand (register rtx op, enum machine_mode mode)
3842 if (mode != VOIDmode && GET_MODE (op) != mode)
3844 if (GET_CODE (op) == SUBREG)
3845 op = SUBREG_REG (op);
3846 return NON_QI_REG_P (op);
3850 zero_extended_scalar_load_operand (rtx op,
3851 enum machine_mode mode ATTRIBUTE_UNUSED)
3854 if (GET_CODE (op) != MEM)
3856 op = maybe_get_pool_constant (op);
3859 if (GET_CODE (op) != CONST_VECTOR)
3862 (GET_MODE_SIZE (GET_MODE (op)) /
3863 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3864 for (n_elts--; n_elts > 0; n_elts--)
3866 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3867 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3873 /* Return 1 when OP is operand acceptable for standard SSE move. */
3875 vector_move_operand (rtx op, enum machine_mode mode)
3877 if (nonimmediate_operand (op, mode))
3879 if (GET_MODE (op) != mode && mode != VOIDmode)
3881 return (op == CONST0_RTX (GET_MODE (op)));
3884 /* Return true if op if a valid address, and does not contain
3885 a segment override. */
3888 no_seg_address_operand (register rtx op, enum machine_mode mode)
3890 struct ix86_address parts;
3892 if (! address_operand (op, mode))
3895 if (! ix86_decompose_address (op, &parts))
3898 return parts.seg == SEG_DEFAULT;
3901 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3904 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3906 enum rtx_code code = GET_CODE (op);
3909 /* Operations supported directly. */
3919 /* These are equivalent to ones above in non-IEEE comparisons. */
3926 return !TARGET_IEEE_FP;
3931 /* Return 1 if OP is a valid comparison operator in valid mode. */
3933 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3935 enum machine_mode inmode;
3936 enum rtx_code code = GET_CODE (op);
3937 if (mode != VOIDmode && GET_MODE (op) != mode)
3939 if (GET_RTX_CLASS (code) != '<')
3941 inmode = GET_MODE (XEXP (op, 0));
3943 if (inmode == CCFPmode || inmode == CCFPUmode)
3945 enum rtx_code second_code, bypass_code;
3946 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3947 return (bypass_code == NIL && second_code == NIL);
3954 if (inmode == CCmode || inmode == CCGCmode
3955 || inmode == CCGOCmode || inmode == CCNOmode)
3958 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3959 if (inmode == CCmode)
3963 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3971 /* Return 1 if OP is a valid comparison operator testing carry flag
3974 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3976 enum machine_mode inmode;
3977 enum rtx_code code = GET_CODE (op);
3979 if (mode != VOIDmode && GET_MODE (op) != mode)
3981 if (GET_RTX_CLASS (code) != '<')
3983 inmode = GET_MODE (XEXP (op, 0));
3984 if (GET_CODE (XEXP (op, 0)) != REG
3985 || REGNO (XEXP (op, 0)) != 17
3986 || XEXP (op, 1) != const0_rtx)
3989 if (inmode == CCFPmode || inmode == CCFPUmode)
3991 enum rtx_code second_code, bypass_code;
3993 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3994 if (bypass_code != NIL || second_code != NIL)
3996 code = ix86_fp_compare_code_to_integer (code);
3998 else if (inmode != CCmode)
4003 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4006 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
4008 enum machine_mode inmode;
4009 enum rtx_code code = GET_CODE (op);
4011 if (mode != VOIDmode && GET_MODE (op) != mode)
4013 if (GET_RTX_CLASS (code) != '<')
4015 inmode = GET_MODE (XEXP (op, 0));
4016 if (inmode == CCFPmode || inmode == CCFPUmode)
4018 enum rtx_code second_code, bypass_code;
4020 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4021 if (bypass_code != NIL || second_code != NIL)
4023 code = ix86_fp_compare_code_to_integer (code);
4025 /* i387 supports just limited amount of conditional codes. */
4028 case LTU: case GTU: case LEU: case GEU:
4029 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4032 case ORDERED: case UNORDERED:
4040 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4043 promotable_binary_operator (register rtx op,
4044 enum machine_mode mode ATTRIBUTE_UNUSED)
4046 switch (GET_CODE (op))
4049 /* Modern CPUs have same latency for HImode and SImode multiply,
4050 but 386 and 486 do HImode multiply faster. */
4051 return ix86_tune > PROCESSOR_I486;
4063 /* Nearly general operand, but accept any const_double, since we wish
4064 to be able to drop them into memory rather than have them get pulled
4068 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
4070 if (mode != VOIDmode && mode != GET_MODE (op))
4072 if (GET_CODE (op) == CONST_DOUBLE)
4074 return general_operand (op, mode);
4077 /* Match an SI or HImode register for a zero_extract. */
4080 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4083 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4084 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4087 if (!register_operand (op, VOIDmode))
4090 /* Be careful to accept only registers having upper parts. */
4091 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4092 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4095 /* Return 1 if this is a valid binary floating-point operation.
4096 OP is the expression matched, and MODE is its mode. */
4099 binary_fp_operator (register rtx op, enum machine_mode mode)
4101 if (mode != VOIDmode && mode != GET_MODE (op))
4104 switch (GET_CODE (op))
4110 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4118 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4120 return GET_CODE (op) == MULT;
4124 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4126 return GET_CODE (op) == DIV;
4130 arith_or_logical_operator (rtx op, enum machine_mode mode)
4132 return ((mode == VOIDmode || GET_MODE (op) == mode)
4133 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4134 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4137 /* Returns 1 if OP is memory operand with a displacement. */
4140 memory_displacement_operand (register rtx op, enum machine_mode mode)
4142 struct ix86_address parts;
4144 if (! memory_operand (op, mode))
4147 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4150 return parts.disp != NULL_RTX;
4153 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4154 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4156 ??? It seems likely that this will only work because cmpsi is an
4157 expander, and no actual insns use this. */
4160 cmpsi_operand (rtx op, enum machine_mode mode)
4162 if (nonimmediate_operand (op, mode))
4165 if (GET_CODE (op) == AND
4166 && GET_MODE (op) == SImode
4167 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4168 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4169 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4170 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4171 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4172 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4178 /* Returns 1 if OP is memory operand that can not be represented by the
4182 long_memory_operand (register rtx op, enum machine_mode mode)
4184 if (! memory_operand (op, mode))
4187 return memory_address_length (op) != 0;
4190 /* Return nonzero if the rtx is known aligned. */
4193 aligned_operand (rtx op, enum machine_mode mode)
4195 struct ix86_address parts;
4197 if (!general_operand (op, mode))
4200 /* Registers and immediate operands are always "aligned". */
4201 if (GET_CODE (op) != MEM)
4204 /* Don't even try to do any aligned optimizations with volatiles. */
4205 if (MEM_VOLATILE_P (op))
4210 /* Pushes and pops are only valid on the stack pointer. */
4211 if (GET_CODE (op) == PRE_DEC
4212 || GET_CODE (op) == POST_INC)
4215 /* Decode the address. */
4216 if (! ix86_decompose_address (op, &parts))
4219 if (parts.base && GET_CODE (parts.base) == SUBREG)
4220 parts.base = SUBREG_REG (parts.base);
4221 if (parts.index && GET_CODE (parts.index) == SUBREG)
4222 parts.index = SUBREG_REG (parts.index);
4224 /* Look for some component that isn't known to be aligned. */
4228 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4233 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4238 if (GET_CODE (parts.disp) != CONST_INT
4239 || (INTVAL (parts.disp) & 3) != 0)
4243 /* Didn't find one -- this must be an aligned address. */
4247 /* Initialize the table of extra 80387 mathematical constants. */
4250 init_ext_80387_constants (void)
4252 static const char * cst[5] =
4254 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4255 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4256 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4257 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4258 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4262 for (i = 0; i < 5; i++)
4264 real_from_string (&ext_80387_constants_table[i], cst[i]);
4265 /* Ensure each constant is rounded to XFmode precision. */
4266 real_convert (&ext_80387_constants_table[i],
4267 XFmode, &ext_80387_constants_table[i]);
4270 ext_80387_constants_init = 1;
4273 /* Return true if the constant is something that can be loaded with
4274 a special instruction. */
4277 standard_80387_constant_p (rtx x)
4279 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4282 if (x == CONST0_RTX (GET_MODE (x)))
4284 if (x == CONST1_RTX (GET_MODE (x)))
4287 /* For XFmode constants, try to find a special 80387 instruction on
4288 those CPUs that benefit from them. */
4289 if (GET_MODE (x) == XFmode
4290 && x86_ext_80387_constants & TUNEMASK)
4295 if (! ext_80387_constants_init)
4296 init_ext_80387_constants ();
4298 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4299 for (i = 0; i < 5; i++)
4300 if (real_identical (&r, &ext_80387_constants_table[i]))
4307 /* Return the opcode of the special instruction to be used to load
4311 standard_80387_constant_opcode (rtx x)
4313 switch (standard_80387_constant_p (x))
4333 /* Return the CONST_DOUBLE representing the 80387 constant that is
4334 loaded by the specified special instruction. The argument IDX
4335 matches the return value from standard_80387_constant_p. */
4338 standard_80387_constant_rtx (int idx)
4342 if (! ext_80387_constants_init)
4343 init_ext_80387_constants ();
4359 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4363 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4366 standard_sse_constant_p (rtx x)
4368 if (x == const0_rtx)
4370 return (x == CONST0_RTX (GET_MODE (x)));
4373 /* Returns 1 if OP contains a symbol reference */
4376 symbolic_reference_mentioned_p (rtx op)
4378 register const char *fmt;
4381 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4384 fmt = GET_RTX_FORMAT (GET_CODE (op));
4385 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4391 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4392 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4396 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4403 /* Return 1 if it is appropriate to emit `ret' instructions in the
4404 body of a function. Do this only if the epilogue is simple, needing a
4405 couple of insns. Prior to reloading, we can't tell how many registers
4406 must be saved, so return 0 then. Return 0 if there is no frame
4407 marker to de-allocate.
4409 If NON_SAVING_SETJMP is defined and true, then it is not possible
4410 for the epilogue to be simple, so return 0. This is a special case
4411 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4412 until final, but jump_optimize may need to know sooner if a
4416 ix86_can_use_return_insn_p (void)
4418 struct ix86_frame frame;
4420 #ifdef NON_SAVING_SETJMP
4421 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4425 if (! reload_completed || frame_pointer_needed)
4428 /* Don't allow more than 32 pop, since that's all we can do
4429 with one instruction. */
4430 if (current_function_pops_args
4431 && current_function_args_size >= 32768)
4434 ix86_compute_frame_layout (&frame);
4435 return frame.to_allocate == 0 && frame.nregs == 0;
4438 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4440 x86_64_sign_extended_value (rtx value)
4442 switch (GET_CODE (value))
4444 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4445 to be at least 32 and this all acceptable constants are
4446 represented as CONST_INT. */
4448 if (HOST_BITS_PER_WIDE_INT == 32)
4452 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4453 return trunc_int_for_mode (val, SImode) == val;
4457 /* For certain code models, the symbolic references are known to fit.
4458 in CM_SMALL_PIC model we know it fits if it is local to the shared
4459 library. Don't count TLS SYMBOL_REFs here, since they should fit
4460 only if inside of UNSPEC handled below. */
4462 /* TLS symbols are not constant. */
4463 if (tls_symbolic_operand (value, Pmode))
4465 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4467 /* For certain code models, the code is near as well. */
4469 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4470 || ix86_cmodel == CM_KERNEL);
4472 /* We also may accept the offsetted memory references in certain special
4475 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4476 switch (XINT (XEXP (value, 0), 1))
4478 case UNSPEC_GOTPCREL:
4480 case UNSPEC_GOTNTPOFF:
4486 if (GET_CODE (XEXP (value, 0)) == PLUS)
4488 rtx op1 = XEXP (XEXP (value, 0), 0);
4489 rtx op2 = XEXP (XEXP (value, 0), 1);
4490 HOST_WIDE_INT offset;
4492 if (ix86_cmodel == CM_LARGE)
4494 if (GET_CODE (op2) != CONST_INT)
4496 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4497 switch (GET_CODE (op1))
4500 /* For CM_SMALL assume that latest object is 16MB before
4501 end of 31bits boundary. We may also accept pretty
4502 large negative constants knowing that all objects are
4503 in the positive half of address space. */
4504 if (ix86_cmodel == CM_SMALL
4505 && offset < 16*1024*1024
4506 && trunc_int_for_mode (offset, SImode) == offset)
4508 /* For CM_KERNEL we know that all object resist in the
4509 negative half of 32bits address space. We may not
4510 accept negative offsets, since they may be just off
4511 and we may accept pretty large positive ones. */
4512 if (ix86_cmodel == CM_KERNEL
4514 && trunc_int_for_mode (offset, SImode) == offset)
4518 /* These conditions are similar to SYMBOL_REF ones, just the
4519 constraints for code models differ. */
4520 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4521 && offset < 16*1024*1024
4522 && trunc_int_for_mode (offset, SImode) == offset)
4524 if (ix86_cmodel == CM_KERNEL
4526 && trunc_int_for_mode (offset, SImode) == offset)
4530 switch (XINT (op1, 1))
4535 && trunc_int_for_mode (offset, SImode) == offset)
4549 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4551 x86_64_zero_extended_value (rtx value)
4553 switch (GET_CODE (value))
4556 if (HOST_BITS_PER_WIDE_INT == 32)
4557 return (GET_MODE (value) == VOIDmode
4558 && !CONST_DOUBLE_HIGH (value));
4562 if (HOST_BITS_PER_WIDE_INT == 32)
4563 return INTVAL (value) >= 0;
4565 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4568 /* For certain code models, the symbolic references are known to fit. */
4570 /* TLS symbols are not constant. */
4571 if (tls_symbolic_operand (value, Pmode))
4573 return ix86_cmodel == CM_SMALL;
4575 /* For certain code models, the code is near as well. */
4577 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4579 /* We also may accept the offsetted memory references in certain special
4582 if (GET_CODE (XEXP (value, 0)) == PLUS)
4584 rtx op1 = XEXP (XEXP (value, 0), 0);
4585 rtx op2 = XEXP (XEXP (value, 0), 1);
4587 if (ix86_cmodel == CM_LARGE)
4589 switch (GET_CODE (op1))
4593 /* For small code model we may accept pretty large positive
4594 offsets, since one bit is available for free. Negative
4595 offsets are limited by the size of NULL pointer area
4596 specified by the ABI. */
4597 if (ix86_cmodel == CM_SMALL
4598 && GET_CODE (op2) == CONST_INT
4599 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4600 && (trunc_int_for_mode (INTVAL (op2), SImode)
4603 /* ??? For the kernel, we may accept adjustment of
4604 -0x10000000, since we know that it will just convert
4605 negative address space to positive, but perhaps this
4606 is not worthwhile. */
4609 /* These conditions are similar to SYMBOL_REF ones, just the
4610 constraints for code models differ. */
4611 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4612 && GET_CODE (op2) == CONST_INT
4613 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4614 && (trunc_int_for_mode (INTVAL (op2), SImode)
4628 /* Value should be nonzero if functions must have frame pointers.
4629 Zero means the frame pointer need not be set up (and parms may
4630 be accessed via the stack pointer) in functions that seem suitable. */
4633 ix86_frame_pointer_required (void)
4635 /* If we accessed previous frames, then the generated code expects
4636 to be able to access the saved ebp value in our frame. */
4637 if (cfun->machine->accesses_prev_frame)
4640 /* Several x86 os'es need a frame pointer for other reasons,
4641 usually pertaining to setjmp. */
4642 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4645 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4646 the frame pointer by default. Turn it back on now if we've not
4647 got a leaf function. */
4648 if (TARGET_OMIT_LEAF_FRAME_POINTER
4649 && (!current_function_is_leaf))
4652 if (current_function_profile)
4658 /* Record that the current function accesses previous call frames. */
4661 ix86_setup_frame_addresses (void)
4663 cfun->machine->accesses_prev_frame = 1;
4666 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4667 # define USE_HIDDEN_LINKONCE 1
4669 # define USE_HIDDEN_LINKONCE 0
4672 static int pic_labels_used;
4674 /* Fills in the label name that should be used for a pc thunk for
4675 the given register. */
4678 get_pc_thunk_name (char name[32], unsigned int regno)
4680 if (USE_HIDDEN_LINKONCE)
4681 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4683 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4687 /* This function generates code for -fpic that loads %ebx with
4688 the return address of the caller and then returns. */
4691 ix86_file_end (void)
4696 for (regno = 0; regno < 8; ++regno)
4700 if (! ((pic_labels_used >> regno) & 1))
4703 get_pc_thunk_name (name, regno);
4705 if (USE_HIDDEN_LINKONCE)
4709 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4711 TREE_PUBLIC (decl) = 1;
4712 TREE_STATIC (decl) = 1;
4713 DECL_ONE_ONLY (decl) = 1;
4715 (*targetm.asm_out.unique_section) (decl, 0);
4716 named_section (decl, NULL, 0);
4718 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4719 fputs ("\t.hidden\t", asm_out_file);
4720 assemble_name (asm_out_file, name);
4721 fputc ('\n', asm_out_file);
4722 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4727 ASM_OUTPUT_LABEL (asm_out_file, name);
4730 xops[0] = gen_rtx_REG (SImode, regno);
4731 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4732 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4733 output_asm_insn ("ret", xops);
4736 if (NEED_INDICATE_EXEC_STACK)
4737 file_end_indicate_exec_stack ();
4740 /* Emit code for the SET_GOT patterns. */
4743 output_set_got (rtx dest)
4748 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4750 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4752 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4755 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4757 output_asm_insn ("call\t%a2", xops);
4760 /* Output the "canonical" label name ("Lxx$pb") here too. This
4761 is what will be referred to by the Mach-O PIC subsystem. */
4762 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4764 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4765 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4768 output_asm_insn ("pop{l}\t%0", xops);
4773 get_pc_thunk_name (name, REGNO (dest));
4774 pic_labels_used |= 1 << REGNO (dest);
4776 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4777 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4778 output_asm_insn ("call\t%X2", xops);
4781 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4782 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4783 else if (!TARGET_MACHO)
4784 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4789 /* Generate an "push" pattern for input ARG. */
4794 return gen_rtx_SET (VOIDmode,
4796 gen_rtx_PRE_DEC (Pmode,
4797 stack_pointer_rtx)),
4801 /* Return >= 0 if there is an unused call-clobbered register available
4802 for the entire function. */
4805 ix86_select_alt_pic_regnum (void)
4807 if (current_function_is_leaf && !current_function_profile)
4810 for (i = 2; i >= 0; --i)
4811 if (!regs_ever_live[i])
4815 return INVALID_REGNUM;
4818 /* Return 1 if we need to save REGNO. */
4820 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4822 if (pic_offset_table_rtx
4823 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4824 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4825 || current_function_profile
4826 || current_function_calls_eh_return
4827 || current_function_uses_const_pool))
4829 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4834 if (current_function_calls_eh_return && maybe_eh_return)
4839 unsigned test = EH_RETURN_DATA_REGNO (i);
4840 if (test == INVALID_REGNUM)
4847 return (regs_ever_live[regno]
4848 && !call_used_regs[regno]
4849 && !fixed_regs[regno]
4850 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4853 /* Return number of registers to be saved on the stack. */
4856 ix86_nsaved_regs (void)
4861 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4862 if (ix86_save_reg (regno, true))
4867 /* Return the offset between two registers, one to be eliminated, and the other
4868 its replacement, at the start of a routine. */
4871 ix86_initial_elimination_offset (int from, int to)
4873 struct ix86_frame frame;
4874 ix86_compute_frame_layout (&frame);
4876 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4877 return frame.hard_frame_pointer_offset;
4878 else if (from == FRAME_POINTER_REGNUM
4879 && to == HARD_FRAME_POINTER_REGNUM)
4880 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4883 if (to != STACK_POINTER_REGNUM)
4885 else if (from == ARG_POINTER_REGNUM)
4886 return frame.stack_pointer_offset;
4887 else if (from != FRAME_POINTER_REGNUM)
4890 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4894 /* Fill structure ix86_frame about frame of currently computed function. */
4897 ix86_compute_frame_layout (struct ix86_frame *frame)
4899 HOST_WIDE_INT total_size;
4900 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4901 HOST_WIDE_INT offset;
4902 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4903 HOST_WIDE_INT size = get_frame_size ();
4905 frame->nregs = ix86_nsaved_regs ();
4908 /* During reload iteration the amount of registers saved can change.
4909 Recompute the value as needed. Do not recompute when amount of registers
4910 didn't change as reload does mutiple calls to the function and does not
4911 expect the decision to change within single iteration. */
4913 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4915 int count = frame->nregs;
4917 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4918 /* The fast prologue uses move instead of push to save registers. This
4919 is significantly longer, but also executes faster as modern hardware
4920 can execute the moves in parallel, but can't do that for push/pop.
4922 Be careful about choosing what prologue to emit: When function takes
4923 many instructions to execute we may use slow version as well as in
4924 case function is known to be outside hot spot (this is known with
4925 feedback only). Weight the size of function by number of registers
4926 to save as it is cheap to use one or two push instructions but very
4927 slow to use many of them. */
4929 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4930 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4931 || (flag_branch_probabilities
4932 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4933 cfun->machine->use_fast_prologue_epilogue = false;
4935 cfun->machine->use_fast_prologue_epilogue
4936 = !expensive_function_p (count);
4938 if (TARGET_PROLOGUE_USING_MOVE
4939 && cfun->machine->use_fast_prologue_epilogue)
4940 frame->save_regs_using_mov = true;
4942 frame->save_regs_using_mov = false;
4945 /* Skip return address and saved base pointer. */
4946 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4948 frame->hard_frame_pointer_offset = offset;
4950 /* Do some sanity checking of stack_alignment_needed and
4951 preferred_alignment, since i386 port is the only using those features
4952 that may break easily. */
4954 if (size && !stack_alignment_needed)
4956 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4958 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4960 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4963 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4964 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4966 /* Register save area */
4967 offset += frame->nregs * UNITS_PER_WORD;
4970 if (ix86_save_varrargs_registers)
4972 offset += X86_64_VARARGS_SIZE;
4973 frame->va_arg_size = X86_64_VARARGS_SIZE;
4976 frame->va_arg_size = 0;
4978 /* Align start of frame for local function. */
4979 frame->padding1 = ((offset + stack_alignment_needed - 1)
4980 & -stack_alignment_needed) - offset;
4982 offset += frame->padding1;
4984 /* Frame pointer points here. */
4985 frame->frame_pointer_offset = offset;
4989 /* Add outgoing arguments area. Can be skipped if we eliminated
4990 all the function calls as dead code. */
4991 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4993 offset += current_function_outgoing_args_size;
4994 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4997 frame->outgoing_arguments_size = 0;
4999 /* Align stack boundary. Only needed if we're calling another function
5001 if (!current_function_is_leaf || current_function_calls_alloca)
5002 frame->padding2 = ((offset + preferred_alignment - 1)
5003 & -preferred_alignment) - offset;
5005 frame->padding2 = 0;
5007 offset += frame->padding2;
5009 /* We've reached end of stack frame. */
5010 frame->stack_pointer_offset = offset;
5012 /* Size prologue needs to allocate. */
5013 frame->to_allocate =
5014 (size + frame->padding1 + frame->padding2
5015 + frame->outgoing_arguments_size + frame->va_arg_size);
5017 if ((!frame->to_allocate && frame->nregs <= 1)
5018 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5019 frame->save_regs_using_mov = false;
5021 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5022 && current_function_is_leaf)
5024 frame->red_zone_size = frame->to_allocate;
5025 if (frame->save_regs_using_mov)
5026 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5027 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5028 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5031 frame->red_zone_size = 0;
5032 frame->to_allocate -= frame->red_zone_size;
5033 frame->stack_pointer_offset -= frame->red_zone_size;
5035 fprintf (stderr, "nregs: %i\n", frame->nregs);
5036 fprintf (stderr, "size: %i\n", size);
5037 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5038 fprintf (stderr, "padding1: %i\n", frame->padding1);
5039 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5040 fprintf (stderr, "padding2: %i\n", frame->padding2);
5041 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5042 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5043 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5044 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5045 frame->hard_frame_pointer_offset);
5046 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5050 /* Emit code to save registers in the prologue. */
5053 ix86_emit_save_regs (void)
5058 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5059 if (ix86_save_reg (regno, true))
5061 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5062 RTX_FRAME_RELATED_P (insn) = 1;
5066 /* Emit code to save registers using MOV insns. First register
5067 is restored from POINTER + OFFSET. */
5069 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5074 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5075 if (ix86_save_reg (regno, true))
5077 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5079 gen_rtx_REG (Pmode, regno));
5080 RTX_FRAME_RELATED_P (insn) = 1;
5081 offset += UNITS_PER_WORD;
5085 /* Expand prologue or epilogue stack adjustment.
5086 The pattern exist to put a dependency on all ebp-based memory accesses.
5087 STYLE should be negative if instructions should be marked as frame related,
5088 zero if %r11 register is live and cannot be freely used and positive
5092 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5097 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5098 else if (x86_64_immediate_operand (offset, DImode))
5099 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5103 /* r11 is used by indirect sibcall return as well, set before the
5104 epilogue and used after the epilogue. ATM indirect sibcall
5105 shouldn't be used together with huge frame sizes in one
5106 function because of the frame_size check in sibcall.c. */
5109 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5110 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5112 RTX_FRAME_RELATED_P (insn) = 1;
5113 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5117 RTX_FRAME_RELATED_P (insn) = 1;
5120 /* Expand the prologue into a bunch of separate insns. */
5123 ix86_expand_prologue (void)
5127 struct ix86_frame frame;
5128 HOST_WIDE_INT allocate;
5130 ix86_compute_frame_layout (&frame);
5132 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5133 slower on all targets. Also sdb doesn't like it. */
5135 if (frame_pointer_needed)
5137 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5138 RTX_FRAME_RELATED_P (insn) = 1;
5140 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5141 RTX_FRAME_RELATED_P (insn) = 1;
5144 allocate = frame.to_allocate;
5146 if (!frame.save_regs_using_mov)
5147 ix86_emit_save_regs ();
5149 allocate += frame.nregs * UNITS_PER_WORD;
5151 /* When using red zone we may start register saving before allocating
5152 the stack frame saving one cycle of the prologue. */
5153 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5154 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5155 : stack_pointer_rtx,
5156 -frame.nregs * UNITS_PER_WORD);
5160 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5161 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5162 GEN_INT (-allocate), -1);
5165 /* Only valid for Win32. */
5166 rtx eax = gen_rtx_REG (SImode, 0);
5167 bool eax_live = ix86_eax_live_at_start_p ();
5174 emit_insn (gen_push (eax));
5178 insn = emit_move_insn (eax, GEN_INT (allocate));
5179 RTX_FRAME_RELATED_P (insn) = 1;
5181 insn = emit_insn (gen_allocate_stack_worker (eax));
5182 RTX_FRAME_RELATED_P (insn) = 1;
5186 rtx t = plus_constant (stack_pointer_rtx, allocate);
5187 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5191 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5193 if (!frame_pointer_needed || !frame.to_allocate)
5194 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5196 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5197 -frame.nregs * UNITS_PER_WORD);
5200 pic_reg_used = false;
5201 if (pic_offset_table_rtx
5202 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5203 || current_function_profile))
5205 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5207 if (alt_pic_reg_used != INVALID_REGNUM)
5208 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5210 pic_reg_used = true;
5215 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5217 /* Even with accurate pre-reload life analysis, we can wind up
5218 deleting all references to the pic register after reload.
5219 Consider if cross-jumping unifies two sides of a branch
5220 controlled by a comparison vs the only read from a global.
5221 In which case, allow the set_got to be deleted, though we're
5222 too late to do anything about the ebx save in the prologue. */
5223 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5226 /* Prevent function calls from be scheduled before the call to mcount.
5227 In the pic_reg_used case, make sure that the got load isn't deleted. */
5228 if (current_function_profile)
5229 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5232 /* Emit code to restore saved registers using MOV insns. First register
5233 is restored from POINTER + OFFSET. */
5235 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5239 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5240 if (ix86_save_reg (regno, maybe_eh_return))
5242 emit_move_insn (gen_rtx_REG (Pmode, regno),
5243 adjust_address (gen_rtx_MEM (Pmode, pointer),
5245 offset += UNITS_PER_WORD;
5249 /* Restore function stack, frame, and registers. */
5252 ix86_expand_epilogue (int style)
5255 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5256 struct ix86_frame frame;
5257 HOST_WIDE_INT offset;
5259 ix86_compute_frame_layout (&frame);
5261 /* Calculate start of saved registers relative to ebp. Special care
5262 must be taken for the normal return case of a function using
5263 eh_return: the eax and edx registers are marked as saved, but not
5264 restored along this path. */
5265 offset = frame.nregs;
5266 if (current_function_calls_eh_return && style != 2)
5268 offset *= -UNITS_PER_WORD;
5270 /* If we're only restoring one register and sp is not valid then
5271 using a move instruction to restore the register since it's
5272 less work than reloading sp and popping the register.
5274 The default code result in stack adjustment using add/lea instruction,
5275 while this code results in LEAVE instruction (or discrete equivalent),
5276 so it is profitable in some other cases as well. Especially when there
5277 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5278 and there is exactly one register to pop. This heuristic may need some
5279 tuning in future. */
5280 if ((!sp_valid && frame.nregs <= 1)
5281 || (TARGET_EPILOGUE_USING_MOVE
5282 && cfun->machine->use_fast_prologue_epilogue
5283 && (frame.nregs > 1 || frame.to_allocate))
5284 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5285 || (frame_pointer_needed && TARGET_USE_LEAVE
5286 && cfun->machine->use_fast_prologue_epilogue
5287 && frame.nregs == 1)
5288 || current_function_calls_eh_return)
5290 /* Restore registers. We can use ebp or esp to address the memory
5291 locations. If both are available, default to ebp, since offsets
5292 are known to be small. Only exception is esp pointing directly to the
5293 end of block of saved registers, where we may simplify addressing
5296 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5297 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5298 frame.to_allocate, style == 2);
5300 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5301 offset, style == 2);
5303 /* eh_return epilogues need %ecx added to the stack pointer. */
5306 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5308 if (frame_pointer_needed)
5310 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5311 tmp = plus_constant (tmp, UNITS_PER_WORD);
5312 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5314 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5315 emit_move_insn (hard_frame_pointer_rtx, tmp);
5317 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5322 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5323 tmp = plus_constant (tmp, (frame.to_allocate
5324 + frame.nregs * UNITS_PER_WORD));
5325 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5328 else if (!frame_pointer_needed)
5329 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5330 GEN_INT (frame.to_allocate
5331 + frame.nregs * UNITS_PER_WORD),
5333 /* If not an i386, mov & pop is faster than "leave". */
5334 else if (TARGET_USE_LEAVE || optimize_size
5335 || !cfun->machine->use_fast_prologue_epilogue)
5336 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5339 pro_epilogue_adjust_stack (stack_pointer_rtx,
5340 hard_frame_pointer_rtx,
5343 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5345 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5350 /* First step is to deallocate the stack frame so that we can
5351 pop the registers. */
5354 if (!frame_pointer_needed)
5356 pro_epilogue_adjust_stack (stack_pointer_rtx,
5357 hard_frame_pointer_rtx,
5358 GEN_INT (offset), style);
5360 else if (frame.to_allocate)
5361 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5362 GEN_INT (frame.to_allocate), style);
5364 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5365 if (ix86_save_reg (regno, false))
5368 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5370 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5372 if (frame_pointer_needed)
5374 /* Leave results in shorter dependency chains on CPUs that are
5375 able to grok it fast. */
5376 if (TARGET_USE_LEAVE)
5377 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5378 else if (TARGET_64BIT)
5379 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5381 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5385 /* Sibcall epilogues don't want a return instruction. */
5389 if (current_function_pops_args && current_function_args_size)
5391 rtx popc = GEN_INT (current_function_pops_args);
5393 /* i386 can only pop 64K bytes. If asked to pop more, pop
5394 return address, do explicit add, and jump indirectly to the
5397 if (current_function_pops_args >= 65536)
5399 rtx ecx = gen_rtx_REG (SImode, 2);
5401 /* There is no "pascal" calling convention in 64bit ABI. */
5405 emit_insn (gen_popsi1 (ecx));
5406 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5407 emit_jump_insn (gen_return_indirect_internal (ecx));
5410 emit_jump_insn (gen_return_pop_internal (popc));
5413 emit_jump_insn (gen_return_internal ());
5416 /* Reset from the function's potential modifications. */
5419 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5420 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5422 if (pic_offset_table_rtx)
5423 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5426 /* Extract the parts of an RTL expression that is a valid memory address
5427 for an instruction. Return 0 if the structure of the address is
5428 grossly off. Return -1 if the address contains ASHIFT, so it is not
5429 strictly valid, but still used for computing length of lea instruction. */
5432 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5434 rtx base = NULL_RTX;
5435 rtx index = NULL_RTX;
5436 rtx disp = NULL_RTX;
5437 HOST_WIDE_INT scale = 1;
5438 rtx scale_rtx = NULL_RTX;
5440 enum ix86_address_seg seg = SEG_DEFAULT;
5442 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5444 else if (GET_CODE (addr) == PLUS)
5454 addends[n++] = XEXP (op, 1);
5457 while (GET_CODE (op) == PLUS);
5462 for (i = n; i >= 0; --i)
5465 switch (GET_CODE (op))
5470 index = XEXP (op, 0);
5471 scale_rtx = XEXP (op, 1);
5475 if (XINT (op, 1) == UNSPEC_TP
5476 && TARGET_TLS_DIRECT_SEG_REFS
5477 && seg == SEG_DEFAULT)
5478 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5507 else if (GET_CODE (addr) == MULT)
5509 index = XEXP (addr, 0); /* index*scale */
5510 scale_rtx = XEXP (addr, 1);
5512 else if (GET_CODE (addr) == ASHIFT)
5516 /* We're called for lea too, which implements ashift on occasion. */
5517 index = XEXP (addr, 0);
5518 tmp = XEXP (addr, 1);
5519 if (GET_CODE (tmp) != CONST_INT)
5521 scale = INTVAL (tmp);
5522 if ((unsigned HOST_WIDE_INT) scale > 3)
5528 disp = addr; /* displacement */
5530 /* Extract the integral value of scale. */
5533 if (GET_CODE (scale_rtx) != CONST_INT)
5535 scale = INTVAL (scale_rtx);
5538 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5539 if (base && index && scale == 1
5540 && (index == arg_pointer_rtx
5541 || index == frame_pointer_rtx
5542 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5549 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5550 if ((base == hard_frame_pointer_rtx
5551 || base == frame_pointer_rtx
5552 || base == arg_pointer_rtx) && !disp)
5555 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5556 Avoid this by transforming to [%esi+0]. */
5557 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5558 && base && !index && !disp
5560 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5563 /* Special case: encode reg+reg instead of reg*2. */
5564 if (!base && index && scale && scale == 2)
5565 base = index, scale = 1;
5567 /* Special case: scaling cannot be encoded without base or displacement. */
5568 if (!base && !disp && index && scale != 1)
5580 /* Return cost of the memory address x.
5581 For i386, it is better to use a complex address than let gcc copy
5582 the address into a reg and make a new pseudo. But not if the address
5583 requires to two regs - that would mean more pseudos with longer
5586 ix86_address_cost (rtx x)
5588 struct ix86_address parts;
5591 if (!ix86_decompose_address (x, &parts))
5594 if (parts.base && GET_CODE (parts.base) == SUBREG)
5595 parts.base = SUBREG_REG (parts.base);
5596 if (parts.index && GET_CODE (parts.index) == SUBREG)
5597 parts.index = SUBREG_REG (parts.index);
5599 /* More complex memory references are better. */
5600 if (parts.disp && parts.disp != const0_rtx)
5602 if (parts.seg != SEG_DEFAULT)
5605 /* Attempt to minimize number of registers in the address. */
5607 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5609 && (!REG_P (parts.index)
5610 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5614 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5616 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5617 && parts.base != parts.index)
5620 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5621 since it's predecode logic can't detect the length of instructions
5622 and it degenerates to vector decoded. Increase cost of such
5623 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5624 to split such addresses or even refuse such addresses at all.
5626 Following addressing modes are affected:
5631 The first and last case may be avoidable by explicitly coding the zero in
5632 memory address, but I don't have AMD-K6 machine handy to check this
5636 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5637 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5638 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5644 /* If X is a machine specific address (i.e. a symbol or label being
5645 referenced as a displacement from the GOT implemented using an
5646 UNSPEC), then return the base term. Otherwise return X. */
5649 ix86_find_base_term (rtx x)
5655 if (GET_CODE (x) != CONST)
5658 if (GET_CODE (term) == PLUS
5659 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5660 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5661 term = XEXP (term, 0);
5662 if (GET_CODE (term) != UNSPEC
5663 || XINT (term, 1) != UNSPEC_GOTPCREL)
5666 term = XVECEXP (term, 0, 0);
5668 if (GET_CODE (term) != SYMBOL_REF
5669 && GET_CODE (term) != LABEL_REF)
5675 term = ix86_delegitimize_address (x);
5677 if (GET_CODE (term) != SYMBOL_REF
5678 && GET_CODE (term) != LABEL_REF)
5684 /* Determine if a given RTX is a valid constant. We already know this
5685 satisfies CONSTANT_P. */
5688 legitimate_constant_p (rtx x)
5692 switch (GET_CODE (x))
5695 /* TLS symbols are not constant. */
5696 if (tls_symbolic_operand (x, Pmode))
5701 inner = XEXP (x, 0);
5703 /* Offsets of TLS symbols are never valid.
5704 Discourage CSE from creating them. */
5705 if (GET_CODE (inner) == PLUS
5706 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5709 if (GET_CODE (inner) == PLUS)
5711 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5713 inner = XEXP (inner, 0);
5716 /* Only some unspecs are valid as "constants". */
5717 if (GET_CODE (inner) == UNSPEC)
5718 switch (XINT (inner, 1))
5722 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5724 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5734 /* Otherwise we handle everything else in the move patterns. */
5738 /* Determine if it's legal to put X into the constant pool. This
5739 is not possible for the address of thread-local symbols, which
5740 is checked above. */
5743 ix86_cannot_force_const_mem (rtx x)
5745 return !legitimate_constant_p (x);
5748 /* Determine if a given RTX is a valid constant address. */
5751 constant_address_p (rtx x)
5753 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5756 /* Nonzero if the constant value X is a legitimate general operand
5757 when generating PIC code. It is given that flag_pic is on and
5758 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5761 legitimate_pic_operand_p (rtx x)
5765 switch (GET_CODE (x))
5768 inner = XEXP (x, 0);
5770 /* Only some unspecs are valid as "constants". */
5771 if (GET_CODE (inner) == UNSPEC)
5772 switch (XINT (inner, 1))
5775 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5783 return legitimate_pic_address_disp_p (x);
5790 /* Determine if a given CONST RTX is a valid memory displacement
5794 legitimate_pic_address_disp_p (register rtx disp)
5798 /* In 64bit mode we can allow direct addresses of symbols and labels
5799 when they are not dynamic symbols. */
5802 /* TLS references should always be enclosed in UNSPEC. */
5803 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5805 if (GET_CODE (disp) == SYMBOL_REF
5806 && ix86_cmodel == CM_SMALL_PIC
5807 && SYMBOL_REF_LOCAL_P (disp))
5809 if (GET_CODE (disp) == LABEL_REF)
5811 if (GET_CODE (disp) == CONST
5812 && GET_CODE (XEXP (disp, 0)) == PLUS)
5814 rtx op0 = XEXP (XEXP (disp, 0), 0);
5815 rtx op1 = XEXP (XEXP (disp, 0), 1);
5817 /* TLS references should always be enclosed in UNSPEC. */
5818 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5820 if (((GET_CODE (op0) == SYMBOL_REF
5821 && ix86_cmodel == CM_SMALL_PIC
5822 && SYMBOL_REF_LOCAL_P (op0))
5823 || GET_CODE (op0) == LABEL_REF)
5824 && GET_CODE (op1) == CONST_INT
5825 && INTVAL (op1) < 16*1024*1024
5826 && INTVAL (op1) >= -16*1024*1024)
5830 if (GET_CODE (disp) != CONST)
5832 disp = XEXP (disp, 0);
5836 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5837 of GOT tables. We should not need these anyway. */
5838 if (GET_CODE (disp) != UNSPEC
5839 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5842 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5843 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5849 if (GET_CODE (disp) == PLUS)
5851 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5853 disp = XEXP (disp, 0);
5857 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5858 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5860 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5861 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5862 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5864 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5865 if (! strcmp (sym_name, "<pic base>"))
5870 if (GET_CODE (disp) != UNSPEC)
5873 switch (XINT (disp, 1))
5878 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5880 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5881 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5882 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5884 case UNSPEC_GOTTPOFF:
5885 case UNSPEC_GOTNTPOFF:
5886 case UNSPEC_INDNTPOFF:
5889 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5891 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5893 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5899 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5900 memory address for an instruction. The MODE argument is the machine mode
5901 for the MEM expression that wants to use this address.
5903 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5904 convert common non-canonical forms to canonical form so that they will
5908 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5910 struct ix86_address parts;
5911 rtx base, index, disp;
5912 HOST_WIDE_INT scale;
5913 const char *reason = NULL;
5914 rtx reason_rtx = NULL_RTX;
5916 if (TARGET_DEBUG_ADDR)
5919 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5920 GET_MODE_NAME (mode), strict);
5924 if (ix86_decompose_address (addr, &parts) <= 0)
5926 reason = "decomposition failed";
5931 index = parts.index;
5933 scale = parts.scale;
5935 /* Validate base register.
5937 Don't allow SUBREG's here, it can lead to spill failures when the base
5938 is one word out of a two word structure, which is represented internally
5946 if (GET_CODE (base) == SUBREG)
5947 reg = SUBREG_REG (base);
5951 if (GET_CODE (reg) != REG)
5953 reason = "base is not a register";
5957 if (GET_MODE (base) != Pmode)
5959 reason = "base is not in Pmode";
5963 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5964 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5966 reason = "base is not valid";
5971 /* Validate index register.
5973 Don't allow SUBREG's here, it can lead to spill failures when the index
5974 is one word out of a two word structure, which is represented internally
5982 if (GET_CODE (index) == SUBREG)
5983 reg = SUBREG_REG (index);
5987 if (GET_CODE (reg) != REG)
5989 reason = "index is not a register";
5993 if (GET_MODE (index) != Pmode)
5995 reason = "index is not in Pmode";
5999 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6000 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6002 reason = "index is not valid";
6007 /* Validate scale factor. */
6010 reason_rtx = GEN_INT (scale);
6013 reason = "scale without index";
6017 if (scale != 2 && scale != 4 && scale != 8)
6019 reason = "scale is not a valid multiplier";
6024 /* Validate displacement. */
6029 if (GET_CODE (disp) == CONST
6030 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6031 switch (XINT (XEXP (disp, 0), 1))
6035 case UNSPEC_GOTPCREL:
6038 goto is_legitimate_pic;
6040 case UNSPEC_GOTTPOFF:
6041 case UNSPEC_GOTNTPOFF:
6042 case UNSPEC_INDNTPOFF:
6048 reason = "invalid address unspec";
6052 else if (flag_pic && (SYMBOLIC_CONST (disp)
6054 && !machopic_operand_p (disp)
6059 if (TARGET_64BIT && (index || base))
6061 /* foo@dtpoff(%rX) is ok. */
6062 if (GET_CODE (disp) != CONST
6063 || GET_CODE (XEXP (disp, 0)) != PLUS
6064 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6065 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6066 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6067 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6069 reason = "non-constant pic memory reference";
6073 else if (! legitimate_pic_address_disp_p (disp))
6075 reason = "displacement is an invalid pic construct";
6079 /* This code used to verify that a symbolic pic displacement
6080 includes the pic_offset_table_rtx register.
6082 While this is good idea, unfortunately these constructs may
6083 be created by "adds using lea" optimization for incorrect
6092 This code is nonsensical, but results in addressing
6093 GOT table with pic_offset_table_rtx base. We can't
6094 just refuse it easily, since it gets matched by
6095 "addsi3" pattern, that later gets split to lea in the
6096 case output register differs from input. While this
6097 can be handled by separate addsi pattern for this case
6098 that never results in lea, this seems to be easier and
6099 correct fix for crash to disable this test. */
6101 else if (GET_CODE (disp) != LABEL_REF
6102 && GET_CODE (disp) != CONST_INT
6103 && (GET_CODE (disp) != CONST
6104 || !legitimate_constant_p (disp))
6105 && (GET_CODE (disp) != SYMBOL_REF
6106 || !legitimate_constant_p (disp)))
6108 reason = "displacement is not constant";
6111 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6113 reason = "displacement is out of range";
6118 /* Everything looks valid. */
6119 if (TARGET_DEBUG_ADDR)
6120 fprintf (stderr, "Success.\n");
6124 if (TARGET_DEBUG_ADDR)
6126 fprintf (stderr, "Error: %s\n", reason);
6127 debug_rtx (reason_rtx);
6132 /* Return an unique alias set for the GOT. */
6134 static HOST_WIDE_INT
6135 ix86_GOT_alias_set (void)
6137 static HOST_WIDE_INT set = -1;
6139 set = new_alias_set ();
6143 /* Return a legitimate reference for ORIG (an address) using the
6144 register REG. If REG is 0, a new pseudo is generated.
6146 There are two types of references that must be handled:
6148 1. Global data references must load the address from the GOT, via
6149 the PIC reg. An insn is emitted to do this load, and the reg is
6152 2. Static data references, constant pool addresses, and code labels
6153 compute the address as an offset from the GOT, whose base is in
6154 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6155 differentiate them from global data objects. The returned
6156 address is the PIC reg + an unspec constant.
6158 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6159 reg also appears in the address. */
6162 legitimize_pic_address (rtx orig, rtx reg)
6170 reg = gen_reg_rtx (Pmode);
6171 /* Use the generic Mach-O PIC machinery. */
6172 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6175 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6177 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6179 /* This symbol may be referenced via a displacement from the PIC
6180 base address (@GOTOFF). */
6182 if (reload_in_progress)
6183 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6184 if (GET_CODE (addr) == CONST)
6185 addr = XEXP (addr, 0);
6186 if (GET_CODE (addr) == PLUS)
6188 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6189 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6192 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6193 new = gen_rtx_CONST (Pmode, new);
6194 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6198 emit_move_insn (reg, new);
6202 else if (GET_CODE (addr) == SYMBOL_REF)
6206 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6207 new = gen_rtx_CONST (Pmode, new);
6208 new = gen_rtx_MEM (Pmode, new);
6209 RTX_UNCHANGING_P (new) = 1;
6210 set_mem_alias_set (new, ix86_GOT_alias_set ());
6213 reg = gen_reg_rtx (Pmode);
6214 /* Use directly gen_movsi, otherwise the address is loaded
6215 into register for CSE. We don't want to CSE this addresses,
6216 instead we CSE addresses from the GOT table, so skip this. */
6217 emit_insn (gen_movsi (reg, new));
6222 /* This symbol must be referenced via a load from the
6223 Global Offset Table (@GOT). */
6225 if (reload_in_progress)
6226 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6227 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6228 new = gen_rtx_CONST (Pmode, new);
6229 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6230 new = gen_rtx_MEM (Pmode, new);
6231 RTX_UNCHANGING_P (new) = 1;
6232 set_mem_alias_set (new, ix86_GOT_alias_set ());
6235 reg = gen_reg_rtx (Pmode);
6236 emit_move_insn (reg, new);
6242 if (GET_CODE (addr) == CONST)
6244 addr = XEXP (addr, 0);
6246 /* We must match stuff we generate before. Assume the only
6247 unspecs that can get here are ours. Not that we could do
6248 anything with them anyway... */
6249 if (GET_CODE (addr) == UNSPEC
6250 || (GET_CODE (addr) == PLUS
6251 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6253 if (GET_CODE (addr) != PLUS)
6256 if (GET_CODE (addr) == PLUS)
6258 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6260 /* Check first to see if this is a constant offset from a @GOTOFF
6261 symbol reference. */
6262 if (local_symbolic_operand (op0, Pmode)
6263 && GET_CODE (op1) == CONST_INT)
6267 if (reload_in_progress)
6268 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6269 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6271 new = gen_rtx_PLUS (Pmode, new, op1);
6272 new = gen_rtx_CONST (Pmode, new);
6273 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6277 emit_move_insn (reg, new);
6283 if (INTVAL (op1) < -16*1024*1024
6284 || INTVAL (op1) >= 16*1024*1024)
6285 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6290 base = legitimize_pic_address (XEXP (addr, 0), reg);
6291 new = legitimize_pic_address (XEXP (addr, 1),
6292 base == reg ? NULL_RTX : reg);
6294 if (GET_CODE (new) == CONST_INT)
6295 new = plus_constant (base, INTVAL (new));
6298 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6300 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6301 new = XEXP (new, 1);
6303 new = gen_rtx_PLUS (Pmode, base, new);
6311 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6314 get_thread_pointer (int to_reg)
6318 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6322 reg = gen_reg_rtx (Pmode);
6323 insn = gen_rtx_SET (VOIDmode, reg, tp);
6324 insn = emit_insn (insn);
6329 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6330 false if we expect this to be used for a memory address and true if
6331 we expect to load the address into a register. */
6334 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6336 rtx dest, base, off, pic;
6341 case TLS_MODEL_GLOBAL_DYNAMIC:
6342 dest = gen_reg_rtx (Pmode);
6345 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6348 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6349 insns = get_insns ();
6352 emit_libcall_block (insns, dest, rax, x);
6355 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6358 case TLS_MODEL_LOCAL_DYNAMIC:
6359 base = gen_reg_rtx (Pmode);
6362 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6365 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6366 insns = get_insns ();
6369 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6370 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6371 emit_libcall_block (insns, base, rax, note);
6374 emit_insn (gen_tls_local_dynamic_base_32 (base));
6376 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6377 off = gen_rtx_CONST (Pmode, off);
6379 return gen_rtx_PLUS (Pmode, base, off);
6381 case TLS_MODEL_INITIAL_EXEC:
6385 type = UNSPEC_GOTNTPOFF;
6389 if (reload_in_progress)
6390 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6391 pic = pic_offset_table_rtx;
6392 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6394 else if (!TARGET_GNU_TLS)
6396 pic = gen_reg_rtx (Pmode);
6397 emit_insn (gen_set_got (pic));
6398 type = UNSPEC_GOTTPOFF;
6403 type = UNSPEC_INDNTPOFF;
6406 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6407 off = gen_rtx_CONST (Pmode, off);
6409 off = gen_rtx_PLUS (Pmode, pic, off);
6410 off = gen_rtx_MEM (Pmode, off);
6411 RTX_UNCHANGING_P (off) = 1;
6412 set_mem_alias_set (off, ix86_GOT_alias_set ());
6414 if (TARGET_64BIT || TARGET_GNU_TLS)
6416 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6417 off = force_reg (Pmode, off);
6418 return gen_rtx_PLUS (Pmode, base, off);
6422 base = get_thread_pointer (true);
6423 dest = gen_reg_rtx (Pmode);
6424 emit_insn (gen_subsi3 (dest, base, off));
6428 case TLS_MODEL_LOCAL_EXEC:
6429 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6430 (TARGET_64BIT || TARGET_GNU_TLS)
6431 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6432 off = gen_rtx_CONST (Pmode, off);
6434 if (TARGET_64BIT || TARGET_GNU_TLS)
6436 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6437 return gen_rtx_PLUS (Pmode, base, off);
6441 base = get_thread_pointer (true);
6442 dest = gen_reg_rtx (Pmode);
6443 emit_insn (gen_subsi3 (dest, base, off));
6454 /* Try machine-dependent ways of modifying an illegitimate address
6455 to be legitimate. If we find one, return the new, valid address.
6456 This macro is used in only one place: `memory_address' in explow.c.
6458 OLDX is the address as it was before break_out_memory_refs was called.
6459 In some cases it is useful to look at this to decide what needs to be done.
6461 MODE and WIN are passed so that this macro can use
6462 GO_IF_LEGITIMATE_ADDRESS.
6464 It is always safe for this macro to do nothing. It exists to recognize
6465 opportunities to optimize the output.
6467 For the 80386, we handle X+REG by loading X into a register R and
6468 using R+REG. R will go in a general reg and indexing will be used.
6469 However, if REG is a broken-out memory address or multiplication,
6470 nothing needs to be done because REG can certainly go in a general reg.
6472 When -fpic is used, special handling is needed for symbolic references.
6473 See comments by legitimize_pic_address in i386.c for details. */
6476 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6477 enum machine_mode mode)
6482 if (TARGET_DEBUG_ADDR)
6484 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6485 GET_MODE_NAME (mode));
6489 log = tls_symbolic_operand (x, mode);
6491 return legitimize_tls_address (x, log, false);
6493 if (flag_pic && SYMBOLIC_CONST (x))
6494 return legitimize_pic_address (x, 0);
6496 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6497 if (GET_CODE (x) == ASHIFT
6498 && GET_CODE (XEXP (x, 1)) == CONST_INT
6499 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6502 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6503 GEN_INT (1 << log));
6506 if (GET_CODE (x) == PLUS)
6508 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6510 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6511 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6512 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6515 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6516 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6517 GEN_INT (1 << log));
6520 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6521 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6522 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6525 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6526 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6527 GEN_INT (1 << log));
6530 /* Put multiply first if it isn't already. */
6531 if (GET_CODE (XEXP (x, 1)) == MULT)
6533 rtx tmp = XEXP (x, 0);
6534 XEXP (x, 0) = XEXP (x, 1);
6539 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6540 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6541 created by virtual register instantiation, register elimination, and
6542 similar optimizations. */
6543 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6546 x = gen_rtx_PLUS (Pmode,
6547 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6548 XEXP (XEXP (x, 1), 0)),
6549 XEXP (XEXP (x, 1), 1));
6553 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6554 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6555 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6556 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6557 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6558 && CONSTANT_P (XEXP (x, 1)))
6561 rtx other = NULL_RTX;
6563 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6565 constant = XEXP (x, 1);
6566 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6568 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6570 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6571 other = XEXP (x, 1);
6579 x = gen_rtx_PLUS (Pmode,
6580 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6581 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6582 plus_constant (other, INTVAL (constant)));
6586 if (changed && legitimate_address_p (mode, x, FALSE))
6589 if (GET_CODE (XEXP (x, 0)) == MULT)
6592 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6595 if (GET_CODE (XEXP (x, 1)) == MULT)
6598 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6602 && GET_CODE (XEXP (x, 1)) == REG
6603 && GET_CODE (XEXP (x, 0)) == REG)
6606 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6609 x = legitimize_pic_address (x, 0);
6612 if (changed && legitimate_address_p (mode, x, FALSE))
6615 if (GET_CODE (XEXP (x, 0)) == REG)
6617 register rtx temp = gen_reg_rtx (Pmode);
6618 register rtx val = force_operand (XEXP (x, 1), temp);
6620 emit_move_insn (temp, val);
6626 else if (GET_CODE (XEXP (x, 1)) == REG)
6628 register rtx temp = gen_reg_rtx (Pmode);
6629 register rtx val = force_operand (XEXP (x, 0), temp);
6631 emit_move_insn (temp, val);
6641 /* Print an integer constant expression in assembler syntax. Addition
6642 and subtraction are the only arithmetic that may appear in these
6643 expressions. FILE is the stdio stream to write to, X is the rtx, and
6644 CODE is the operand print code from the output string. */
6647 output_pic_addr_const (FILE *file, rtx x, int code)
6651 switch (GET_CODE (x))
6661 assemble_name (file, XSTR (x, 0));
6662 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6663 fputs ("@PLT", file);
6670 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6671 assemble_name (asm_out_file, buf);
6675 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6679 /* This used to output parentheses around the expression,
6680 but that does not work on the 386 (either ATT or BSD assembler). */
6681 output_pic_addr_const (file, XEXP (x, 0), code);
6685 if (GET_MODE (x) == VOIDmode)
6687 /* We can use %d if the number is <32 bits and positive. */
6688 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6689 fprintf (file, "0x%lx%08lx",
6690 (unsigned long) CONST_DOUBLE_HIGH (x),
6691 (unsigned long) CONST_DOUBLE_LOW (x));
6693 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6696 /* We can't handle floating point constants;
6697 PRINT_OPERAND must handle them. */
6698 output_operand_lossage ("floating constant misused");
6702 /* Some assemblers need integer constants to appear first. */
6703 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6705 output_pic_addr_const (file, XEXP (x, 0), code);
6707 output_pic_addr_const (file, XEXP (x, 1), code);
6709 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6711 output_pic_addr_const (file, XEXP (x, 1), code);
6713 output_pic_addr_const (file, XEXP (x, 0), code);
6721 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6722 output_pic_addr_const (file, XEXP (x, 0), code);
6724 output_pic_addr_const (file, XEXP (x, 1), code);
6726 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6730 if (XVECLEN (x, 0) != 1)
6732 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6733 switch (XINT (x, 1))
6736 fputs ("@GOT", file);
6739 fputs ("@GOTOFF", file);
6741 case UNSPEC_GOTPCREL:
6742 fputs ("@GOTPCREL(%rip)", file);
6744 case UNSPEC_GOTTPOFF:
6745 /* FIXME: This might be @TPOFF in Sun ld too. */
6746 fputs ("@GOTTPOFF", file);
6749 fputs ("@TPOFF", file);
6753 fputs ("@TPOFF", file);
6755 fputs ("@NTPOFF", file);
6758 fputs ("@DTPOFF", file);
6760 case UNSPEC_GOTNTPOFF:
6762 fputs ("@GOTTPOFF(%rip)", file);
6764 fputs ("@GOTNTPOFF", file);
6766 case UNSPEC_INDNTPOFF:
6767 fputs ("@INDNTPOFF", file);
6770 output_operand_lossage ("invalid UNSPEC as operand");
6776 output_operand_lossage ("invalid expression as operand");
6780 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6781 We need to handle our special PIC relocations. */
6784 i386_dwarf_output_addr_const (FILE *file, rtx x)
6787 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6791 fprintf (file, "%s", ASM_LONG);
6794 output_pic_addr_const (file, x, '\0');
6796 output_addr_const (file, x);
6800 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6801 We need to emit DTP-relative relocations. */
6804 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6806 fputs (ASM_LONG, file);
6807 output_addr_const (file, x);
6808 fputs ("@DTPOFF", file);
6814 fputs (", 0", file);
6821 /* In the name of slightly smaller debug output, and to cater to
6822 general assembler losage, recognize PIC+GOTOFF and turn it back
6823 into a direct symbol reference. */
6826 ix86_delegitimize_address (rtx orig_x)
6830 if (GET_CODE (x) == MEM)
6835 if (GET_CODE (x) != CONST
6836 || GET_CODE (XEXP (x, 0)) != UNSPEC
6837 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6838 || GET_CODE (orig_x) != MEM)
6840 return XVECEXP (XEXP (x, 0), 0, 0);
6843 if (GET_CODE (x) != PLUS
6844 || GET_CODE (XEXP (x, 1)) != CONST)
6847 if (GET_CODE (XEXP (x, 0)) == REG
6848 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6849 /* %ebx + GOT/GOTOFF */
6851 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6853 /* %ebx + %reg * scale + GOT/GOTOFF */
6855 if (GET_CODE (XEXP (y, 0)) == REG
6856 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6858 else if (GET_CODE (XEXP (y, 1)) == REG
6859 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6863 if (GET_CODE (y) != REG
6864 && GET_CODE (y) != MULT
6865 && GET_CODE (y) != ASHIFT)
6871 x = XEXP (XEXP (x, 1), 0);
6872 if (GET_CODE (x) == UNSPEC
6873 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6874 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6877 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6878 return XVECEXP (x, 0, 0);
6881 if (GET_CODE (x) == PLUS
6882 && GET_CODE (XEXP (x, 0)) == UNSPEC
6883 && GET_CODE (XEXP (x, 1)) == CONST_INT
6884 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6885 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6886 && GET_CODE (orig_x) != MEM)))
6888 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6890 return gen_rtx_PLUS (Pmode, y, x);
6898 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6903 if (mode == CCFPmode || mode == CCFPUmode)
6905 enum rtx_code second_code, bypass_code;
6906 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6907 if (bypass_code != NIL || second_code != NIL)
6909 code = ix86_fp_compare_code_to_integer (code);
6913 code = reverse_condition (code);
6924 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6929 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6930 Those same assemblers have the same but opposite losage on cmov. */
6933 suffix = fp ? "nbe" : "a";
6936 if (mode == CCNOmode || mode == CCGOCmode)
6938 else if (mode == CCmode || mode == CCGCmode)
6949 if (mode == CCNOmode || mode == CCGOCmode)
6951 else if (mode == CCmode || mode == CCGCmode)
6960 suffix = fp ? "nb" : "ae";
6963 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6973 suffix = fp ? "u" : "p";
6976 suffix = fp ? "nu" : "np";
6981 fputs (suffix, file);
6985 print_reg (rtx x, int code, FILE *file)
6987 /* Code -1 indicates we are called from print_rtx, and it is not
6988 an error for a virtual register to appear here. */
6991 else if (REGNO (x) == ARG_POINTER_REGNUM
6992 || REGNO (x) == FRAME_POINTER_REGNUM
6993 || REGNO (x) == FLAGS_REG
6994 || REGNO (x) == FPSR_REG)
6997 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7000 if (code == 'w' || MMX_REG_P (x))
7002 else if (code == 'b')
7004 else if (code == 'k')
7006 else if (code == 'q')
7008 else if (code == 'y')
7010 else if (code == 'h')
7013 code = GET_MODE_SIZE (GET_MODE (x));
7015 /* Irritatingly, AMD extended registers use different naming convention
7016 from the normal registers. */
7017 if (REX_INT_REG_P (x))
7024 error ("extended registers have no high halves");
7027 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7030 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7033 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7036 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7039 error ("unsupported operand size for extended register");
7047 if (STACK_TOP_P (x))
7049 fputs ("st(0)", file);
7056 if (! ANY_FP_REG_P (x))
7057 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7062 fputs (hi_reg_name[REGNO (x)], file);
7065 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7067 fputs (qi_reg_name[REGNO (x)], file);
7070 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7072 fputs (qi_high_reg_name[REGNO (x)], file);
7079 /* Locate some local-dynamic symbol still in use by this function
7080 so that we can print its name in some tls_local_dynamic_base
7084 get_some_local_dynamic_name (void)
7088 if (cfun->machine->some_ld_name)
7089 return cfun->machine->some_ld_name;
7091 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7093 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7094 return cfun->machine->some_ld_name;
7100 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7104 if (GET_CODE (x) == SYMBOL_REF
7105 && local_dynamic_symbolic_operand (x, Pmode))
7107 cfun->machine->some_ld_name = XSTR (x, 0);
7115 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7116 C -- print opcode suffix for set/cmov insn.
7117 c -- like C, but print reversed condition
7118 F,f -- likewise, but for floating-point.
7119 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7121 R -- print the prefix for register names.
7122 z -- print the opcode suffix for the size of the current operand.
7123 * -- print a star (in certain assembler syntax)
7124 A -- print an absolute memory reference.
7125 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7126 s -- print a shift double count, followed by the assemblers argument
7128 b -- print the QImode name of the register for the indicated operand.
7129 %b0 would print %al if operands[0] is reg 0.
7130 w -- likewise, print the HImode name of the register.
7131 k -- likewise, print the SImode name of the register.
7132 q -- likewise, print the DImode name of the register.
7133 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7134 y -- print "st(0)" instead of "st" as a register.
7135 D -- print condition for SSE cmp instruction.
7136 P -- if PIC, print an @PLT suffix.
7137 X -- don't print any sort of PIC '@' suffix for a symbol.
7138 & -- print some in-use local-dynamic symbol name.
7142 print_operand (FILE *file, rtx x, int code)
7149 if (ASSEMBLER_DIALECT == ASM_ATT)
7154 assemble_name (file, get_some_local_dynamic_name ());
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7160 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7162 /* Intel syntax. For absolute addresses, registers should not
7163 be surrounded by braces. */
7164 if (GET_CODE (x) != REG)
7167 PRINT_OPERAND (file, x, 0);
7175 PRINT_OPERAND (file, x, 0);
7180 if (ASSEMBLER_DIALECT == ASM_ATT)
7185 if (ASSEMBLER_DIALECT == ASM_ATT)
7190 if (ASSEMBLER_DIALECT == ASM_ATT)
7195 if (ASSEMBLER_DIALECT == ASM_ATT)
7200 if (ASSEMBLER_DIALECT == ASM_ATT)
7205 if (ASSEMBLER_DIALECT == ASM_ATT)
7210 /* 387 opcodes don't get size suffixes if the operands are
7212 if (STACK_REG_P (x))
7215 /* Likewise if using Intel opcodes. */
7216 if (ASSEMBLER_DIALECT == ASM_INTEL)
7219 /* This is the size of op from size of operand. */
7220 switch (GET_MODE_SIZE (GET_MODE (x)))
7223 #ifdef HAVE_GAS_FILDS_FISTS
7229 if (GET_MODE (x) == SFmode)
7244 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7246 #ifdef GAS_MNEMONICS
7272 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7274 PRINT_OPERAND (file, x, 0);
7280 /* Little bit of braindamage here. The SSE compare instructions
7281 does use completely different names for the comparisons that the
7282 fp conditional moves. */
7283 switch (GET_CODE (x))
7298 fputs ("unord", file);
7302 fputs ("neq", file);
7306 fputs ("nlt", file);
7310 fputs ("nle", file);
7313 fputs ("ord", file);
7321 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7322 if (ASSEMBLER_DIALECT == ASM_ATT)
7324 switch (GET_MODE (x))
7326 case HImode: putc ('w', file); break;
7328 case SFmode: putc ('l', file); break;
7330 case DFmode: putc ('q', file); break;
7338 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7341 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7342 if (ASSEMBLER_DIALECT == ASM_ATT)
7345 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7348 /* Like above, but reverse condition */
7350 /* Check to see if argument to %c is really a constant
7351 and not a condition code which needs to be reversed. */
7352 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7354 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7357 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7360 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7361 if (ASSEMBLER_DIALECT == ASM_ATT)
7364 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7370 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7373 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7376 int pred_val = INTVAL (XEXP (x, 0));
7378 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7379 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7381 int taken = pred_val > REG_BR_PROB_BASE / 2;
7382 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7384 /* Emit hints only in the case default branch prediction
7385 heuristics would fail. */
7386 if (taken != cputaken)
7388 /* We use 3e (DS) prefix for taken branches and
7389 2e (CS) prefix for not taken branches. */
7391 fputs ("ds ; ", file);
7393 fputs ("cs ; ", file);
7400 output_operand_lossage ("invalid operand code `%c'", code);
7404 if (GET_CODE (x) == REG)
7406 PRINT_REG (x, code, file);
7409 else if (GET_CODE (x) == MEM)
7411 /* No `byte ptr' prefix for call instructions. */
7412 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7415 switch (GET_MODE_SIZE (GET_MODE (x)))
7417 case 1: size = "BYTE"; break;
7418 case 2: size = "WORD"; break;
7419 case 4: size = "DWORD"; break;
7420 case 8: size = "QWORD"; break;
7421 case 12: size = "XWORD"; break;
7422 case 16: size = "XMMWORD"; break;
7427 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7430 else if (code == 'w')
7432 else if (code == 'k')
7436 fputs (" PTR ", file);
7440 /* Avoid (%rip) for call operands. */
7441 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7442 && GET_CODE (x) != CONST_INT)
7443 output_addr_const (file, x);
7444 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7445 output_operand_lossage ("invalid constraints for operand");
7450 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7455 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7456 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7458 if (ASSEMBLER_DIALECT == ASM_ATT)
7460 fprintf (file, "0x%08lx", l);
7463 /* These float cases don't actually occur as immediate operands. */
7464 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7468 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7469 fprintf (file, "%s", dstr);
7472 else if (GET_CODE (x) == CONST_DOUBLE
7473 && GET_MODE (x) == XFmode)
7477 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7478 fprintf (file, "%s", dstr);
7485 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7487 if (ASSEMBLER_DIALECT == ASM_ATT)
7490 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7491 || GET_CODE (x) == LABEL_REF)
7493 if (ASSEMBLER_DIALECT == ASM_ATT)
7496 fputs ("OFFSET FLAT:", file);
7499 if (GET_CODE (x) == CONST_INT)
7500 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7502 output_pic_addr_const (file, x, code);
7504 output_addr_const (file, x);
7508 /* Print a memory operand whose address is ADDR. */
7511 print_operand_address (FILE *file, register rtx addr)
7513 struct ix86_address parts;
7514 rtx base, index, disp;
7517 if (! ix86_decompose_address (addr, &parts))
7521 index = parts.index;
7523 scale = parts.scale;
7531 if (USER_LABEL_PREFIX[0] == 0)
7533 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7539 if (!base && !index)
7541 /* Displacement only requires special attention. */
7543 if (GET_CODE (disp) == CONST_INT)
7545 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7547 if (USER_LABEL_PREFIX[0] == 0)
7549 fputs ("ds:", file);
7551 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7554 output_pic_addr_const (file, disp, 0);
7556 output_addr_const (file, disp);
7558 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7560 && ((GET_CODE (disp) == SYMBOL_REF
7561 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7562 || GET_CODE (disp) == LABEL_REF
7563 || (GET_CODE (disp) == CONST
7564 && GET_CODE (XEXP (disp, 0)) == PLUS
7565 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7566 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7567 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7568 fputs ("(%rip)", file);
7572 if (ASSEMBLER_DIALECT == ASM_ATT)
7577 output_pic_addr_const (file, disp, 0);
7578 else if (GET_CODE (disp) == LABEL_REF)
7579 output_asm_label (disp);
7581 output_addr_const (file, disp);
7586 PRINT_REG (base, 0, file);
7590 PRINT_REG (index, 0, file);
7592 fprintf (file, ",%d", scale);
7598 rtx offset = NULL_RTX;
7602 /* Pull out the offset of a symbol; print any symbol itself. */
7603 if (GET_CODE (disp) == CONST
7604 && GET_CODE (XEXP (disp, 0)) == PLUS
7605 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7607 offset = XEXP (XEXP (disp, 0), 1);
7608 disp = gen_rtx_CONST (VOIDmode,
7609 XEXP (XEXP (disp, 0), 0));
7613 output_pic_addr_const (file, disp, 0);
7614 else if (GET_CODE (disp) == LABEL_REF)
7615 output_asm_label (disp);
7616 else if (GET_CODE (disp) == CONST_INT)
7619 output_addr_const (file, disp);
7625 PRINT_REG (base, 0, file);
7628 if (INTVAL (offset) >= 0)
7630 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7641 PRINT_REG (index, 0, file);
7643 fprintf (file, "*%d", scale);
7651 output_addr_const_extra (FILE *file, rtx x)
7655 if (GET_CODE (x) != UNSPEC)
7658 op = XVECEXP (x, 0, 0);
7659 switch (XINT (x, 1))
7661 case UNSPEC_GOTTPOFF:
7662 output_addr_const (file, op);
7663 /* FIXME: This might be @TPOFF in Sun ld. */
7664 fputs ("@GOTTPOFF", file);
7667 output_addr_const (file, op);
7668 fputs ("@TPOFF", file);
7671 output_addr_const (file, op);
7673 fputs ("@TPOFF", file);
7675 fputs ("@NTPOFF", file);
7678 output_addr_const (file, op);
7679 fputs ("@DTPOFF", file);
7681 case UNSPEC_GOTNTPOFF:
7682 output_addr_const (file, op);
7684 fputs ("@GOTTPOFF(%rip)", file);
7686 fputs ("@GOTNTPOFF", file);
7688 case UNSPEC_INDNTPOFF:
7689 output_addr_const (file, op);
7690 fputs ("@INDNTPOFF", file);
7700 /* Split one or more DImode RTL references into pairs of SImode
7701 references. The RTL can be REG, offsettable MEM, integer constant, or
7702 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7703 split and "num" is its length. lo_half and hi_half are output arrays
7704 that parallel "operands". */
7707 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7711 rtx op = operands[num];
7713 /* simplify_subreg refuse to split volatile memory addresses,
7714 but we still have to handle it. */
7715 if (GET_CODE (op) == MEM)
7717 lo_half[num] = adjust_address (op, SImode, 0);
7718 hi_half[num] = adjust_address (op, SImode, 4);
7722 lo_half[num] = simplify_gen_subreg (SImode, op,
7723 GET_MODE (op) == VOIDmode
7724 ? DImode : GET_MODE (op), 0);
7725 hi_half[num] = simplify_gen_subreg (SImode, op,
7726 GET_MODE (op) == VOIDmode
7727 ? DImode : GET_MODE (op), 4);
7731 /* Split one or more TImode RTL references into pairs of SImode
7732 references. The RTL can be REG, offsettable MEM, integer constant, or
7733 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7734 split and "num" is its length. lo_half and hi_half are output arrays
7735 that parallel "operands". */
7738 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7742 rtx op = operands[num];
7744 /* simplify_subreg refuse to split volatile memory addresses, but we
7745 still have to handle it. */
7746 if (GET_CODE (op) == MEM)
7748 lo_half[num] = adjust_address (op, DImode, 0);
7749 hi_half[num] = adjust_address (op, DImode, 8);
7753 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7754 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7759 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7760 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7761 is the expression of the binary operation. The output may either be
7762 emitted here, or returned to the caller, like all output_* functions.
7764 There is no guarantee that the operands are the same mode, as they
7765 might be within FLOAT or FLOAT_EXTEND expressions. */
7767 #ifndef SYSV386_COMPAT
7768 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7769 wants to fix the assemblers because that causes incompatibility
7770 with gcc. No-one wants to fix gcc because that causes
7771 incompatibility with assemblers... You can use the option of
7772 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7773 #define SYSV386_COMPAT 1
7777 output_387_binary_op (rtx insn, rtx *operands)
7779 static char buf[30];
7782 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7784 #ifdef ENABLE_CHECKING
7785 /* Even if we do not want to check the inputs, this documents input
7786 constraints. Which helps in understanding the following code. */
7787 if (STACK_REG_P (operands[0])
7788 && ((REG_P (operands[1])
7789 && REGNO (operands[0]) == REGNO (operands[1])
7790 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7791 || (REG_P (operands[2])
7792 && REGNO (operands[0]) == REGNO (operands[2])
7793 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7794 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7800 switch (GET_CODE (operands[3]))
7803 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7804 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7812 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7813 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7821 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7822 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7830 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7831 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7845 if (GET_MODE (operands[0]) == SFmode)
7846 strcat (buf, "ss\t{%2, %0|%0, %2}");
7848 strcat (buf, "sd\t{%2, %0|%0, %2}");
7853 switch (GET_CODE (operands[3]))
7857 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7859 rtx temp = operands[2];
7860 operands[2] = operands[1];
7864 /* know operands[0] == operands[1]. */
7866 if (GET_CODE (operands[2]) == MEM)
7872 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7874 if (STACK_TOP_P (operands[0]))
7875 /* How is it that we are storing to a dead operand[2]?
7876 Well, presumably operands[1] is dead too. We can't
7877 store the result to st(0) as st(0) gets popped on this
7878 instruction. Instead store to operands[2] (which I
7879 think has to be st(1)). st(1) will be popped later.
7880 gcc <= 2.8.1 didn't have this check and generated
7881 assembly code that the Unixware assembler rejected. */
7882 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7884 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7888 if (STACK_TOP_P (operands[0]))
7889 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7891 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7896 if (GET_CODE (operands[1]) == MEM)
7902 if (GET_CODE (operands[2]) == MEM)
7908 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7911 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7912 derived assemblers, confusingly reverse the direction of
7913 the operation for fsub{r} and fdiv{r} when the
7914 destination register is not st(0). The Intel assembler
7915 doesn't have this brain damage. Read !SYSV386_COMPAT to
7916 figure out what the hardware really does. */
7917 if (STACK_TOP_P (operands[0]))
7918 p = "{p\t%0, %2|rp\t%2, %0}";
7920 p = "{rp\t%2, %0|p\t%0, %2}";
7922 if (STACK_TOP_P (operands[0]))
7923 /* As above for fmul/fadd, we can't store to st(0). */
7924 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7926 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7931 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7934 if (STACK_TOP_P (operands[0]))
7935 p = "{rp\t%0, %1|p\t%1, %0}";
7937 p = "{p\t%1, %0|rp\t%0, %1}";
7939 if (STACK_TOP_P (operands[0]))
7940 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7942 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7947 if (STACK_TOP_P (operands[0]))
7949 if (STACK_TOP_P (operands[1]))
7950 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7952 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7955 else if (STACK_TOP_P (operands[1]))
7958 p = "{\t%1, %0|r\t%0, %1}";
7960 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7966 p = "{r\t%2, %0|\t%0, %2}";
7968 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7981 /* Output code to initialize control word copies used by
7982 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7983 is set to control word rounding downwards. */
7985 emit_i387_cw_initialization (rtx normal, rtx round_down)
7987 rtx reg = gen_reg_rtx (HImode);
7989 emit_insn (gen_x86_fnstcw_1 (normal));
7990 emit_move_insn (reg, normal);
7991 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7993 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7995 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7996 emit_move_insn (round_down, reg);
7999 /* Output code for INSN to convert a float to a signed int. OPERANDS
8000 are the insn operands. The output may be [HSD]Imode and the input
8001 operand may be [SDX]Fmode. */
8004 output_fix_trunc (rtx insn, rtx *operands)
8006 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8007 int dimode_p = GET_MODE (operands[0]) == DImode;
8009 /* Jump through a hoop or two for DImode, since the hardware has no
8010 non-popping instruction. We used to do this a different way, but
8011 that was somewhat fragile and broke with post-reload splitters. */
8012 if (dimode_p && !stack_top_dies)
8013 output_asm_insn ("fld\t%y1", operands);
8015 if (!STACK_TOP_P (operands[1]))
8018 if (GET_CODE (operands[0]) != MEM)
8021 output_asm_insn ("fldcw\t%3", operands);
8022 if (stack_top_dies || dimode_p)
8023 output_asm_insn ("fistp%z0\t%0", operands);
8025 output_asm_insn ("fist%z0\t%0", operands);
8026 output_asm_insn ("fldcw\t%2", operands);
8031 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8032 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8033 when fucom should be used. */
8036 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8039 rtx cmp_op0 = operands[0];
8040 rtx cmp_op1 = operands[1];
8041 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8046 cmp_op1 = operands[2];
8050 if (GET_MODE (operands[0]) == SFmode)
8052 return "ucomiss\t{%1, %0|%0, %1}";
8054 return "comiss\t{%1, %0|%0, %1}";
8057 return "ucomisd\t{%1, %0|%0, %1}";
8059 return "comisd\t{%1, %0|%0, %1}";
8062 if (! STACK_TOP_P (cmp_op0))
8065 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8067 if (STACK_REG_P (cmp_op1)
8069 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8070 && REGNO (cmp_op1) != FIRST_STACK_REG)
8072 /* If both the top of the 387 stack dies, and the other operand
8073 is also a stack register that dies, then this must be a
8074 `fcompp' float compare */
8078 /* There is no double popping fcomi variant. Fortunately,
8079 eflags is immune from the fstp's cc clobbering. */
8081 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8083 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8091 return "fucompp\n\tfnstsw\t%0";
8093 return "fcompp\n\tfnstsw\t%0";
8106 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8108 static const char * const alt[24] =
8120 "fcomi\t{%y1, %0|%0, %y1}",
8121 "fcomip\t{%y1, %0|%0, %y1}",
8122 "fucomi\t{%y1, %0|%0, %y1}",
8123 "fucomip\t{%y1, %0|%0, %y1}",
8130 "fcom%z2\t%y2\n\tfnstsw\t%0",
8131 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8132 "fucom%z2\t%y2\n\tfnstsw\t%0",
8133 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8135 "ficom%z2\t%y2\n\tfnstsw\t%0",
8136 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8144 mask = eflags_p << 3;
8145 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8146 mask |= unordered_p << 1;
8147 mask |= stack_top_dies;
8160 ix86_output_addr_vec_elt (FILE *file, int value)
8162 const char *directive = ASM_LONG;
8167 directive = ASM_QUAD;
8173 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8177 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8180 fprintf (file, "%s%s%d-%s%d\n",
8181 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8182 else if (HAVE_AS_GOTOFF_IN_DATA)
8183 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8185 else if (TARGET_MACHO)
8187 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8188 machopic_output_function_base_name (file);
8189 fprintf(file, "\n");
8193 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8194 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8197 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8201 ix86_expand_clear (rtx dest)
8205 /* We play register width games, which are only valid after reload. */
8206 if (!reload_completed)
8209 /* Avoid HImode and its attendant prefix byte. */
8210 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8211 dest = gen_rtx_REG (SImode, REGNO (dest));
8213 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8215 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8216 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8218 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8219 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8225 /* X is an unchanging MEM. If it is a constant pool reference, return
8226 the constant pool rtx, else NULL. */
8229 maybe_get_pool_constant (rtx x)
8231 x = ix86_delegitimize_address (XEXP (x, 0));
8233 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8234 return get_pool_constant (x);
8240 ix86_expand_move (enum machine_mode mode, rtx operands[])
8242 int strict = (reload_in_progress || reload_completed);
8244 enum tls_model model;
8249 model = tls_symbolic_operand (op1, Pmode);
8252 op1 = legitimize_tls_address (op1, model, true);
8253 op1 = force_operand (op1, op0);
8258 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8263 rtx temp = ((reload_in_progress
8264 || ((op0 && GET_CODE (op0) == REG)
8266 ? op0 : gen_reg_rtx (Pmode));
8267 op1 = machopic_indirect_data_reference (op1, temp);
8268 op1 = machopic_legitimize_pic_address (op1, mode,
8269 temp == op1 ? 0 : temp);
8271 else if (MACHOPIC_INDIRECT)
8272 op1 = machopic_indirect_data_reference (op1, 0);
8276 if (GET_CODE (op0) == MEM)
8277 op1 = force_reg (Pmode, op1);
8281 if (GET_CODE (temp) != REG)
8282 temp = gen_reg_rtx (Pmode);
8283 temp = legitimize_pic_address (op1, temp);
8288 #endif /* TARGET_MACHO */
8292 if (GET_CODE (op0) == MEM
8293 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8294 || !push_operand (op0, mode))
8295 && GET_CODE (op1) == MEM)
8296 op1 = force_reg (mode, op1);
8298 if (push_operand (op0, mode)
8299 && ! general_no_elim_operand (op1, mode))
8300 op1 = copy_to_mode_reg (mode, op1);
8302 /* Force large constants in 64bit compilation into register
8303 to get them CSEed. */
8304 if (TARGET_64BIT && mode == DImode
8305 && immediate_operand (op1, mode)
8306 && !x86_64_zero_extended_value (op1)
8307 && !register_operand (op0, mode)
8308 && optimize && !reload_completed && !reload_in_progress)
8309 op1 = copy_to_mode_reg (mode, op1);
8311 if (FLOAT_MODE_P (mode))
8313 /* If we are loading a floating point constant to a register,
8314 force the value to memory now, since we'll get better code
8315 out the back end. */
8319 else if (GET_CODE (op1) == CONST_DOUBLE)
8321 op1 = validize_mem (force_const_mem (mode, op1));
8322 if (!register_operand (op0, mode))
8324 rtx temp = gen_reg_rtx (mode);
8325 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8326 emit_move_insn (op0, temp);
8333 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8337 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8339 /* Force constants other than zero into memory. We do not know how
8340 the instructions used to build constants modify the upper 64 bits
8341 of the register, once we have that information we may be able
8342 to handle some of them more efficiently. */
8343 if ((reload_in_progress | reload_completed) == 0
8344 && register_operand (operands[0], mode)
8345 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8346 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8348 /* Make operand1 a register if it isn't already. */
8350 && !register_operand (operands[0], mode)
8351 && !register_operand (operands[1], mode))
8353 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8354 emit_move_insn (operands[0], temp);
8358 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8361 /* Attempt to expand a binary operator. Make the expansion closer to the
8362 actual machine, then just general_operand, which will allow 3 separate
8363 memory references (one output, two input) in a single insn. */
8366 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8369 int matching_memory;
8370 rtx src1, src2, dst, op, clob;
8376 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8377 if (GET_RTX_CLASS (code) == 'c'
8378 && (rtx_equal_p (dst, src2)
8379 || immediate_operand (src1, mode)))
8386 /* If the destination is memory, and we do not have matching source
8387 operands, do things in registers. */
8388 matching_memory = 0;
8389 if (GET_CODE (dst) == MEM)
8391 if (rtx_equal_p (dst, src1))
8392 matching_memory = 1;
8393 else if (GET_RTX_CLASS (code) == 'c'
8394 && rtx_equal_p (dst, src2))
8395 matching_memory = 2;
8397 dst = gen_reg_rtx (mode);
8400 /* Both source operands cannot be in memory. */
8401 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8403 if (matching_memory != 2)
8404 src2 = force_reg (mode, src2);
8406 src1 = force_reg (mode, src1);
8409 /* If the operation is not commutable, source 1 cannot be a constant
8410 or non-matching memory. */
8411 if ((CONSTANT_P (src1)
8412 || (!matching_memory && GET_CODE (src1) == MEM))
8413 && GET_RTX_CLASS (code) != 'c')
8414 src1 = force_reg (mode, src1);
8416 /* If optimizing, copy to regs to improve CSE */
8417 if (optimize && ! no_new_pseudos)
8419 if (GET_CODE (dst) == MEM)
8420 dst = gen_reg_rtx (mode);
8421 if (GET_CODE (src1) == MEM)
8422 src1 = force_reg (mode, src1);
8423 if (GET_CODE (src2) == MEM)
8424 src2 = force_reg (mode, src2);
8427 /* Emit the instruction. */
8429 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8430 if (reload_in_progress)
8432 /* Reload doesn't know about the flags register, and doesn't know that
8433 it doesn't want to clobber it. We can only do this with PLUS. */
8440 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8441 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8444 /* Fix up the destination if needed. */
8445 if (dst != operands[0])
8446 emit_move_insn (operands[0], dst);
8449 /* Return TRUE or FALSE depending on whether the binary operator meets the
8450 appropriate constraints. */
8453 ix86_binary_operator_ok (enum rtx_code code,
8454 enum machine_mode mode ATTRIBUTE_UNUSED,
8457 /* Both source operands cannot be in memory. */
8458 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8460 /* If the operation is not commutable, source 1 cannot be a constant. */
8461 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8463 /* If the destination is memory, we must have a matching source operand. */
8464 if (GET_CODE (operands[0]) == MEM
8465 && ! (rtx_equal_p (operands[0], operands[1])
8466 || (GET_RTX_CLASS (code) == 'c'
8467 && rtx_equal_p (operands[0], operands[2]))))
8469 /* If the operation is not commutable and the source 1 is memory, we must
8470 have a matching destination. */
8471 if (GET_CODE (operands[1]) == MEM
8472 && GET_RTX_CLASS (code) != 'c'
8473 && ! rtx_equal_p (operands[0], operands[1]))
8478 /* Attempt to expand a unary operator. Make the expansion closer to the
8479 actual machine, then just general_operand, which will allow 2 separate
8480 memory references (one output, one input) in a single insn. */
8483 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8486 int matching_memory;
8487 rtx src, dst, op, clob;
8492 /* If the destination is memory, and we do not have matching source
8493 operands, do things in registers. */
8494 matching_memory = 0;
8495 if (GET_CODE (dst) == MEM)
8497 if (rtx_equal_p (dst, src))
8498 matching_memory = 1;
8500 dst = gen_reg_rtx (mode);
8503 /* When source operand is memory, destination must match. */
8504 if (!matching_memory && GET_CODE (src) == MEM)
8505 src = force_reg (mode, src);
8507 /* If optimizing, copy to regs to improve CSE */
8508 if (optimize && ! no_new_pseudos)
8510 if (GET_CODE (dst) == MEM)
8511 dst = gen_reg_rtx (mode);
8512 if (GET_CODE (src) == MEM)
8513 src = force_reg (mode, src);
8516 /* Emit the instruction. */
8518 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8519 if (reload_in_progress || code == NOT)
8521 /* Reload doesn't know about the flags register, and doesn't know that
8522 it doesn't want to clobber it. */
8529 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8530 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8533 /* Fix up the destination if needed. */
8534 if (dst != operands[0])
8535 emit_move_insn (operands[0], dst);
8538 /* Return TRUE or FALSE depending on whether the unary operator meets the
8539 appropriate constraints. */
8542 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8543 enum machine_mode mode ATTRIBUTE_UNUSED,
8544 rtx operands[2] ATTRIBUTE_UNUSED)
8546 /* If one of operands is memory, source and destination must match. */
8547 if ((GET_CODE (operands[0]) == MEM
8548 || GET_CODE (operands[1]) == MEM)
8549 && ! rtx_equal_p (operands[0], operands[1]))
8554 /* Return TRUE or FALSE depending on whether the first SET in INSN
8555 has source and destination with matching CC modes, and that the
8556 CC mode is at least as constrained as REQ_MODE. */
8559 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8562 enum machine_mode set_mode;
8564 set = PATTERN (insn);
8565 if (GET_CODE (set) == PARALLEL)
8566 set = XVECEXP (set, 0, 0);
8567 if (GET_CODE (set) != SET)
8569 if (GET_CODE (SET_SRC (set)) != COMPARE)
8572 set_mode = GET_MODE (SET_DEST (set));
8576 if (req_mode != CCNOmode
8577 && (req_mode != CCmode
8578 || XEXP (SET_SRC (set), 1) != const0_rtx))
8582 if (req_mode == CCGCmode)
8586 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8590 if (req_mode == CCZmode)
8600 return (GET_MODE (SET_SRC (set)) == set_mode);
8603 /* Generate insn patterns to do an integer compare of OPERANDS. */
8606 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8608 enum machine_mode cmpmode;
8611 cmpmode = SELECT_CC_MODE (code, op0, op1);
8612 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8614 /* This is very simple, but making the interface the same as in the
8615 FP case makes the rest of the code easier. */
8616 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8617 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8619 /* Return the test that should be put into the flags user, i.e.
8620 the bcc, scc, or cmov instruction. */
8621 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8624 /* Figure out whether to use ordered or unordered fp comparisons.
8625 Return the appropriate mode to use. */
8628 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8630 /* ??? In order to make all comparisons reversible, we do all comparisons
8631 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8632 all forms trapping and nontrapping comparisons, we can make inequality
8633 comparisons trapping again, since it results in better code when using
8634 FCOM based compares. */
8635 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8639 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8641 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8642 return ix86_fp_compare_mode (code);
8645 /* Only zero flag is needed. */
8647 case NE: /* ZF!=0 */
8649 /* Codes needing carry flag. */
8650 case GEU: /* CF=0 */
8651 case GTU: /* CF=0 & ZF=0 */
8652 case LTU: /* CF=1 */
8653 case LEU: /* CF=1 | ZF=1 */
8655 /* Codes possibly doable only with sign flag when
8656 comparing against zero. */
8657 case GE: /* SF=OF or SF=0 */
8658 case LT: /* SF<>OF or SF=1 */
8659 if (op1 == const0_rtx)
8662 /* For other cases Carry flag is not required. */
8664 /* Codes doable only with sign flag when comparing
8665 against zero, but we miss jump instruction for it
8666 so we need to use relational tests against overflow
8667 that thus needs to be zero. */
8668 case GT: /* ZF=0 & SF=OF */
8669 case LE: /* ZF=1 | SF<>OF */
8670 if (op1 == const0_rtx)
8674 /* strcmp pattern do (use flags) and combine may ask us for proper
8683 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8686 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8688 enum rtx_code swapped_code = swap_condition (code);
8689 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8690 || (ix86_fp_comparison_cost (swapped_code)
8691 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8694 /* Swap, force into registers, or otherwise massage the two operands
8695 to a fp comparison. The operands are updated in place; the new
8696 comparison code is returned. */
8698 static enum rtx_code
8699 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8701 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8702 rtx op0 = *pop0, op1 = *pop1;
8703 enum machine_mode op_mode = GET_MODE (op0);
8704 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8706 /* All of the unordered compare instructions only work on registers.
8707 The same is true of the XFmode compare instructions. The same is
8708 true of the fcomi compare instructions. */
8711 && (fpcmp_mode == CCFPUmode
8712 || op_mode == XFmode
8713 || ix86_use_fcomi_compare (code)))
8715 op0 = force_reg (op_mode, op0);
8716 op1 = force_reg (op_mode, op1);
8720 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8721 things around if they appear profitable, otherwise force op0
8724 if (standard_80387_constant_p (op0) == 0
8725 || (GET_CODE (op0) == MEM
8726 && ! (standard_80387_constant_p (op1) == 0
8727 || GET_CODE (op1) == MEM)))
8730 tmp = op0, op0 = op1, op1 = tmp;
8731 code = swap_condition (code);
8734 if (GET_CODE (op0) != REG)
8735 op0 = force_reg (op_mode, op0);
8737 if (CONSTANT_P (op1))
8739 if (standard_80387_constant_p (op1))
8740 op1 = force_reg (op_mode, op1);
8742 op1 = validize_mem (force_const_mem (op_mode, op1));
8746 /* Try to rearrange the comparison to make it cheaper. */
8747 if (ix86_fp_comparison_cost (code)
8748 > ix86_fp_comparison_cost (swap_condition (code))
8749 && (GET_CODE (op1) == REG || !no_new_pseudos))
8752 tmp = op0, op0 = op1, op1 = tmp;
8753 code = swap_condition (code);
8754 if (GET_CODE (op0) != REG)
8755 op0 = force_reg (op_mode, op0);
8763 /* Convert comparison codes we use to represent FP comparison to integer
8764 code that will result in proper branch. Return UNKNOWN if no such code
8766 static enum rtx_code
8767 ix86_fp_compare_code_to_integer (enum rtx_code code)
8796 /* Split comparison code CODE into comparisons we can do using branch
8797 instructions. BYPASS_CODE is comparison code for branch that will
8798 branch around FIRST_CODE and SECOND_CODE. If some of branches
8799 is not required, set value to NIL.
8800 We never require more than two branches. */
8802 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8803 enum rtx_code *first_code,
8804 enum rtx_code *second_code)
8810 /* The fcomi comparison sets flags as follows:
8820 case GT: /* GTU - CF=0 & ZF=0 */
8821 case GE: /* GEU - CF=0 */
8822 case ORDERED: /* PF=0 */
8823 case UNORDERED: /* PF=1 */
8824 case UNEQ: /* EQ - ZF=1 */
8825 case UNLT: /* LTU - CF=1 */
8826 case UNLE: /* LEU - CF=1 | ZF=1 */
8827 case LTGT: /* EQ - ZF=0 */
8829 case LT: /* LTU - CF=1 - fails on unordered */
8831 *bypass_code = UNORDERED;
8833 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8835 *bypass_code = UNORDERED;
8837 case EQ: /* EQ - ZF=1 - fails on unordered */
8839 *bypass_code = UNORDERED;
8841 case NE: /* NE - ZF=0 - fails on unordered */
8843 *second_code = UNORDERED;
8845 case UNGE: /* GEU - CF=0 - fails on unordered */
8847 *second_code = UNORDERED;
8849 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8851 *second_code = UNORDERED;
8856 if (!TARGET_IEEE_FP)
8863 /* Return cost of comparison done fcom + arithmetics operations on AX.
8864 All following functions do use number of instructions as a cost metrics.
8865 In future this should be tweaked to compute bytes for optimize_size and
8866 take into account performance of various instructions on various CPUs. */
8868 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8870 if (!TARGET_IEEE_FP)
8872 /* The cost of code output by ix86_expand_fp_compare. */
8900 /* Return cost of comparison done using fcomi operation.
8901 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8903 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8905 enum rtx_code bypass_code, first_code, second_code;
8906 /* Return arbitrarily high cost when instruction is not supported - this
8907 prevents gcc from using it. */
8910 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8911 return (bypass_code != NIL || second_code != NIL) + 2;
8914 /* Return cost of comparison done using sahf operation.
8915 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8917 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8919 enum rtx_code bypass_code, first_code, second_code;
8920 /* Return arbitrarily high cost when instruction is not preferred - this
8921 avoids gcc from using it. */
8922 if (!TARGET_USE_SAHF && !optimize_size)
8924 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8925 return (bypass_code != NIL || second_code != NIL) + 3;
8928 /* Compute cost of the comparison done using any method.
8929 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8931 ix86_fp_comparison_cost (enum rtx_code code)
8933 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8936 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8937 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8939 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8940 if (min > sahf_cost)
8942 if (min > fcomi_cost)
8947 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8950 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8951 rtx *second_test, rtx *bypass_test)
8953 enum machine_mode fpcmp_mode, intcmp_mode;
8955 int cost = ix86_fp_comparison_cost (code);
8956 enum rtx_code bypass_code, first_code, second_code;
8958 fpcmp_mode = ix86_fp_compare_mode (code);
8959 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8962 *second_test = NULL_RTX;
8964 *bypass_test = NULL_RTX;
8966 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8968 /* Do fcomi/sahf based test when profitable. */
8969 if ((bypass_code == NIL || bypass_test)
8970 && (second_code == NIL || second_test)
8971 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8975 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8976 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8982 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8983 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8985 scratch = gen_reg_rtx (HImode);
8986 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8987 emit_insn (gen_x86_sahf_1 (scratch));
8990 /* The FP codes work out to act like unsigned. */
8991 intcmp_mode = fpcmp_mode;
8993 if (bypass_code != NIL)
8994 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8995 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8997 if (second_code != NIL)
8998 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8999 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9004 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9005 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9006 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9008 scratch = gen_reg_rtx (HImode);
9009 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9011 /* In the unordered case, we have to check C2 for NaN's, which
9012 doesn't happen to work out to anything nice combination-wise.
9013 So do some bit twiddling on the value we've got in AH to come
9014 up with an appropriate set of condition codes. */
9016 intcmp_mode = CCNOmode;
9021 if (code == GT || !TARGET_IEEE_FP)
9023 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9028 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9029 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9030 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9031 intcmp_mode = CCmode;
9037 if (code == LT && TARGET_IEEE_FP)
9039 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9040 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9041 intcmp_mode = CCmode;
9046 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9052 if (code == GE || !TARGET_IEEE_FP)
9054 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9059 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9060 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9067 if (code == LE && TARGET_IEEE_FP)
9069 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9070 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9071 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9072 intcmp_mode = CCmode;
9077 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9083 if (code == EQ && TARGET_IEEE_FP)
9085 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9086 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9087 intcmp_mode = CCmode;
9092 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9099 if (code == NE && TARGET_IEEE_FP)
9101 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9102 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9108 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9114 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9118 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9127 /* Return the test that should be put into the flags user, i.e.
9128 the bcc, scc, or cmov instruction. */
9129 return gen_rtx_fmt_ee (code, VOIDmode,
9130 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9135 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9138 op0 = ix86_compare_op0;
9139 op1 = ix86_compare_op1;
9142 *second_test = NULL_RTX;
9144 *bypass_test = NULL_RTX;
9146 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9147 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9148 second_test, bypass_test);
9150 ret = ix86_expand_int_compare (code, op0, op1);
9155 /* Return true if the CODE will result in nontrivial jump sequence. */
9157 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9159 enum rtx_code bypass_code, first_code, second_code;
9162 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9163 return bypass_code != NIL || second_code != NIL;
9167 ix86_expand_branch (enum rtx_code code, rtx label)
9171 switch (GET_MODE (ix86_compare_op0))
9177 tmp = ix86_expand_compare (code, NULL, NULL);
9178 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9179 gen_rtx_LABEL_REF (VOIDmode, label),
9181 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9190 enum rtx_code bypass_code, first_code, second_code;
9192 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9195 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9197 /* Check whether we will use the natural sequence with one jump. If
9198 so, we can expand jump early. Otherwise delay expansion by
9199 creating compound insn to not confuse optimizers. */
9200 if (bypass_code == NIL && second_code == NIL
9203 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9204 gen_rtx_LABEL_REF (VOIDmode, label),
9209 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9210 ix86_compare_op0, ix86_compare_op1);
9211 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9212 gen_rtx_LABEL_REF (VOIDmode, label),
9214 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9216 use_fcomi = ix86_use_fcomi_compare (code);
9217 vec = rtvec_alloc (3 + !use_fcomi);
9218 RTVEC_ELT (vec, 0) = tmp;
9220 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9222 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9225 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9227 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9235 /* Expand DImode branch into multiple compare+branch. */
9237 rtx lo[2], hi[2], label2;
9238 enum rtx_code code1, code2, code3;
9240 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9242 tmp = ix86_compare_op0;
9243 ix86_compare_op0 = ix86_compare_op1;
9244 ix86_compare_op1 = tmp;
9245 code = swap_condition (code);
9247 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9248 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9250 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9251 avoid two branches. This costs one extra insn, so disable when
9252 optimizing for size. */
9254 if ((code == EQ || code == NE)
9256 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9261 if (hi[1] != const0_rtx)
9262 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9263 NULL_RTX, 0, OPTAB_WIDEN);
9266 if (lo[1] != const0_rtx)
9267 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9268 NULL_RTX, 0, OPTAB_WIDEN);
9270 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9271 NULL_RTX, 0, OPTAB_WIDEN);
9273 ix86_compare_op0 = tmp;
9274 ix86_compare_op1 = const0_rtx;
9275 ix86_expand_branch (code, label);
9279 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9280 op1 is a constant and the low word is zero, then we can just
9281 examine the high word. */
9283 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9286 case LT: case LTU: case GE: case GEU:
9287 ix86_compare_op0 = hi[0];
9288 ix86_compare_op1 = hi[1];
9289 ix86_expand_branch (code, label);
9295 /* Otherwise, we need two or three jumps. */
9297 label2 = gen_label_rtx ();
9300 code2 = swap_condition (code);
9301 code3 = unsigned_condition (code);
9305 case LT: case GT: case LTU: case GTU:
9308 case LE: code1 = LT; code2 = GT; break;
9309 case GE: code1 = GT; code2 = LT; break;
9310 case LEU: code1 = LTU; code2 = GTU; break;
9311 case GEU: code1 = GTU; code2 = LTU; break;
9313 case EQ: code1 = NIL; code2 = NE; break;
9314 case NE: code2 = NIL; break;
9322 * if (hi(a) < hi(b)) goto true;
9323 * if (hi(a) > hi(b)) goto false;
9324 * if (lo(a) < lo(b)) goto true;
9328 ix86_compare_op0 = hi[0];
9329 ix86_compare_op1 = hi[1];
9332 ix86_expand_branch (code1, label);
9334 ix86_expand_branch (code2, label2);
9336 ix86_compare_op0 = lo[0];
9337 ix86_compare_op1 = lo[1];
9338 ix86_expand_branch (code3, label);
9341 emit_label (label2);
9350 /* Split branch based on floating point condition. */
9352 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9353 rtx target1, rtx target2, rtx tmp)
9356 rtx label = NULL_RTX;
9358 int bypass_probability = -1, second_probability = -1, probability = -1;
9361 if (target2 != pc_rtx)
9364 code = reverse_condition_maybe_unordered (code);
9369 condition = ix86_expand_fp_compare (code, op1, op2,
9370 tmp, &second, &bypass);
9372 if (split_branch_probability >= 0)
9374 /* Distribute the probabilities across the jumps.
9375 Assume the BYPASS and SECOND to be always test
9377 probability = split_branch_probability;
9379 /* Value of 1 is low enough to make no need for probability
9380 to be updated. Later we may run some experiments and see
9381 if unordered values are more frequent in practice. */
9383 bypass_probability = 1;
9385 second_probability = 1;
9387 if (bypass != NULL_RTX)
9389 label = gen_label_rtx ();
9390 i = emit_jump_insn (gen_rtx_SET
9392 gen_rtx_IF_THEN_ELSE (VOIDmode,
9394 gen_rtx_LABEL_REF (VOIDmode,
9397 if (bypass_probability >= 0)
9399 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9400 GEN_INT (bypass_probability),
9403 i = emit_jump_insn (gen_rtx_SET
9405 gen_rtx_IF_THEN_ELSE (VOIDmode,
9406 condition, target1, target2)));
9407 if (probability >= 0)
9409 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9410 GEN_INT (probability),
9412 if (second != NULL_RTX)
9414 i = emit_jump_insn (gen_rtx_SET
9416 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9418 if (second_probability >= 0)
9420 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9421 GEN_INT (second_probability),
9424 if (label != NULL_RTX)
9429 ix86_expand_setcc (enum rtx_code code, rtx dest)
9431 rtx ret, tmp, tmpreg, equiv;
9432 rtx second_test, bypass_test;
9434 if (GET_MODE (ix86_compare_op0) == DImode
9436 return 0; /* FAIL */
9438 if (GET_MODE (dest) != QImode)
9441 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9442 PUT_MODE (ret, QImode);
9447 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9448 if (bypass_test || second_test)
9450 rtx test = second_test;
9452 rtx tmp2 = gen_reg_rtx (QImode);
9459 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9461 PUT_MODE (test, QImode);
9462 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9465 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9467 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9470 /* Attach a REG_EQUAL note describing the comparison result. */
9471 equiv = simplify_gen_relational (code, QImode,
9472 GET_MODE (ix86_compare_op0),
9473 ix86_compare_op0, ix86_compare_op1);
9474 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9476 return 1; /* DONE */
9479 /* Expand comparison setting or clearing carry flag. Return true when
9480 successful and set pop for the operation. */
9482 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9484 enum machine_mode mode =
9485 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9487 /* Do not handle DImode compares that go trought special path. Also we can't
9488 deal with FP compares yet. This is possible to add. */
9489 if ((mode == DImode && !TARGET_64BIT))
9491 if (FLOAT_MODE_P (mode))
9493 rtx second_test = NULL, bypass_test = NULL;
9494 rtx compare_op, compare_seq;
9496 /* Shortcut: following common codes never translate into carry flag compares. */
9497 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9498 || code == ORDERED || code == UNORDERED)
9501 /* These comparisons require zero flag; swap operands so they won't. */
9502 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9508 code = swap_condition (code);
9511 /* Try to expand the comparison and verify that we end up with carry flag
9512 based comparison. This is fails to be true only when we decide to expand
9513 comparison using arithmetic that is not too common scenario. */
9515 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9516 &second_test, &bypass_test);
9517 compare_seq = get_insns ();
9520 if (second_test || bypass_test)
9522 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9523 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9524 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9526 code = GET_CODE (compare_op);
9527 if (code != LTU && code != GEU)
9529 emit_insn (compare_seq);
9533 if (!INTEGRAL_MODE_P (mode))
9541 /* Convert a==0 into (unsigned)a<1. */
9544 if (op1 != const0_rtx)
9547 code = (code == EQ ? LTU : GEU);
9550 /* Convert a>b into b<a or a>=b-1. */
9553 if (GET_CODE (op1) == CONST_INT)
9555 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9556 /* Bail out on overflow. We still can swap operands but that
9557 would force loading of the constant into register. */
9558 if (op1 == const0_rtx
9559 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9561 code = (code == GTU ? GEU : LTU);
9568 code = (code == GTU ? LTU : GEU);
9572 /* Convert a>=0 into (unsigned)a<0x80000000. */
9575 if (mode == DImode || op1 != const0_rtx)
9577 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9578 code = (code == LT ? GEU : LTU);
9582 if (mode == DImode || op1 != constm1_rtx)
9584 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9585 code = (code == LE ? GEU : LTU);
9591 /* Swapping operands may cause constant to appear as first operand. */
9592 if (!nonimmediate_operand (op0, VOIDmode))
9596 op0 = force_reg (mode, op0);
9598 ix86_compare_op0 = op0;
9599 ix86_compare_op1 = op1;
9600 *pop = ix86_expand_compare (code, NULL, NULL);
9601 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9607 ix86_expand_int_movcc (rtx operands[])
9609 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9610 rtx compare_seq, compare_op;
9611 rtx second_test, bypass_test;
9612 enum machine_mode mode = GET_MODE (operands[0]);
9613 bool sign_bit_compare_p = false;;
9616 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9617 compare_seq = get_insns ();
9620 compare_code = GET_CODE (compare_op);
9622 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9623 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9624 sign_bit_compare_p = true;
9626 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9627 HImode insns, we'd be swallowed in word prefix ops. */
9629 if ((mode != HImode || TARGET_FAST_PREFIX)
9630 && (mode != DImode || TARGET_64BIT)
9631 && GET_CODE (operands[2]) == CONST_INT
9632 && GET_CODE (operands[3]) == CONST_INT)
9634 rtx out = operands[0];
9635 HOST_WIDE_INT ct = INTVAL (operands[2]);
9636 HOST_WIDE_INT cf = INTVAL (operands[3]);
9640 /* Sign bit compares are better done using shifts than we do by using
9642 if (sign_bit_compare_p
9643 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9644 ix86_compare_op1, &compare_op))
9646 /* Detect overlap between destination and compare sources. */
9649 if (!sign_bit_compare_p)
9653 compare_code = GET_CODE (compare_op);
9655 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9656 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9659 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9662 /* To simplify rest of code, restrict to the GEU case. */
9663 if (compare_code == LTU)
9665 HOST_WIDE_INT tmp = ct;
9668 compare_code = reverse_condition (compare_code);
9669 code = reverse_condition (code);
9674 PUT_CODE (compare_op,
9675 reverse_condition_maybe_unordered
9676 (GET_CODE (compare_op)));
9678 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9682 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9683 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9684 tmp = gen_reg_rtx (mode);
9687 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9689 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9693 if (code == GT || code == GE)
9694 code = reverse_condition (code);
9697 HOST_WIDE_INT tmp = ct;
9702 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9703 ix86_compare_op1, VOIDmode, 0, -1);
9716 tmp = expand_simple_binop (mode, PLUS,
9718 copy_rtx (tmp), 1, OPTAB_DIRECT);
9729 tmp = expand_simple_binop (mode, IOR,
9731 copy_rtx (tmp), 1, OPTAB_DIRECT);
9733 else if (diff == -1 && ct)
9743 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9745 tmp = expand_simple_binop (mode, PLUS,
9746 copy_rtx (tmp), GEN_INT (cf),
9747 copy_rtx (tmp), 1, OPTAB_DIRECT);
9755 * andl cf - ct, dest
9765 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9768 tmp = expand_simple_binop (mode, AND,
9770 gen_int_mode (cf - ct, mode),
9771 copy_rtx (tmp), 1, OPTAB_DIRECT);
9773 tmp = expand_simple_binop (mode, PLUS,
9774 copy_rtx (tmp), GEN_INT (ct),
9775 copy_rtx (tmp), 1, OPTAB_DIRECT);
9778 if (!rtx_equal_p (tmp, out))
9779 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9781 return 1; /* DONE */
9787 tmp = ct, ct = cf, cf = tmp;
9789 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9791 /* We may be reversing unordered compare to normal compare, that
9792 is not valid in general (we may convert non-trapping condition
9793 to trapping one), however on i386 we currently emit all
9794 comparisons unordered. */
9795 compare_code = reverse_condition_maybe_unordered (compare_code);
9796 code = reverse_condition_maybe_unordered (code);
9800 compare_code = reverse_condition (compare_code);
9801 code = reverse_condition (code);
9806 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9807 && GET_CODE (ix86_compare_op1) == CONST_INT)
9809 if (ix86_compare_op1 == const0_rtx
9810 && (code == LT || code == GE))
9811 compare_code = code;
9812 else if (ix86_compare_op1 == constm1_rtx)
9816 else if (code == GT)
9821 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9822 if (compare_code != NIL
9823 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9824 && (cf == -1 || ct == -1))
9826 /* If lea code below could be used, only optimize
9827 if it results in a 2 insn sequence. */
9829 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9830 || diff == 3 || diff == 5 || diff == 9)
9831 || (compare_code == LT && ct == -1)
9832 || (compare_code == GE && cf == -1))
9835 * notl op1 (if necessary)
9843 code = reverse_condition (code);
9846 out = emit_store_flag (out, code, ix86_compare_op0,
9847 ix86_compare_op1, VOIDmode, 0, -1);
9849 out = expand_simple_binop (mode, IOR,
9851 out, 1, OPTAB_DIRECT);
9852 if (out != operands[0])
9853 emit_move_insn (operands[0], out);
9855 return 1; /* DONE */
9860 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9861 || diff == 3 || diff == 5 || diff == 9)
9862 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9863 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9869 * lea cf(dest*(ct-cf)),dest
9873 * This also catches the degenerate setcc-only case.
9879 out = emit_store_flag (out, code, ix86_compare_op0,
9880 ix86_compare_op1, VOIDmode, 0, 1);
9883 /* On x86_64 the lea instruction operates on Pmode, so we need
9884 to get arithmetics done in proper mode to match. */
9886 tmp = copy_rtx (out);
9890 out1 = copy_rtx (out);
9891 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9895 tmp = gen_rtx_PLUS (mode, tmp, out1);
9901 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9904 if (!rtx_equal_p (tmp, out))
9907 out = force_operand (tmp, copy_rtx (out));
9909 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9911 if (!rtx_equal_p (out, operands[0]))
9912 emit_move_insn (operands[0], copy_rtx (out));
9914 return 1; /* DONE */
9918 * General case: Jumpful:
9919 * xorl dest,dest cmpl op1, op2
9920 * cmpl op1, op2 movl ct, dest
9922 * decl dest movl cf, dest
9923 * andl (cf-ct),dest 1:
9928 * This is reasonably steep, but branch mispredict costs are
9929 * high on modern cpus, so consider failing only if optimizing
9933 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9934 && BRANCH_COST >= 2)
9940 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9941 /* We may be reversing unordered compare to normal compare,
9942 that is not valid in general (we may convert non-trapping
9943 condition to trapping one), however on i386 we currently
9944 emit all comparisons unordered. */
9945 code = reverse_condition_maybe_unordered (code);
9948 code = reverse_condition (code);
9949 if (compare_code != NIL)
9950 compare_code = reverse_condition (compare_code);
9954 if (compare_code != NIL)
9956 /* notl op1 (if needed)
9961 For x < 0 (resp. x <= -1) there will be no notl,
9962 so if possible swap the constants to get rid of the
9964 True/false will be -1/0 while code below (store flag
9965 followed by decrement) is 0/-1, so the constants need
9966 to be exchanged once more. */
9968 if (compare_code == GE || !cf)
9970 code = reverse_condition (code);
9975 HOST_WIDE_INT tmp = cf;
9980 out = emit_store_flag (out, code, ix86_compare_op0,
9981 ix86_compare_op1, VOIDmode, 0, -1);
9985 out = emit_store_flag (out, code, ix86_compare_op0,
9986 ix86_compare_op1, VOIDmode, 0, 1);
9988 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9989 copy_rtx (out), 1, OPTAB_DIRECT);
9992 out = expand_simple_binop (mode, AND, copy_rtx (out),
9993 gen_int_mode (cf - ct, mode),
9994 copy_rtx (out), 1, OPTAB_DIRECT);
9996 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9997 copy_rtx (out), 1, OPTAB_DIRECT);
9998 if (!rtx_equal_p (out, operands[0]))
9999 emit_move_insn (operands[0], copy_rtx (out));
10001 return 1; /* DONE */
10005 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10007 /* Try a few things more with specific constants and a variable. */
10010 rtx var, orig_out, out, tmp;
10012 if (BRANCH_COST <= 2)
10013 return 0; /* FAIL */
10015 /* If one of the two operands is an interesting constant, load a
10016 constant with the above and mask it in with a logical operation. */
10018 if (GET_CODE (operands[2]) == CONST_INT)
10021 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10022 operands[3] = constm1_rtx, op = and_optab;
10023 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10024 operands[3] = const0_rtx, op = ior_optab;
10026 return 0; /* FAIL */
10028 else if (GET_CODE (operands[3]) == CONST_INT)
10031 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10032 operands[2] = constm1_rtx, op = and_optab;
10033 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10034 operands[2] = const0_rtx, op = ior_optab;
10036 return 0; /* FAIL */
10039 return 0; /* FAIL */
10041 orig_out = operands[0];
10042 tmp = gen_reg_rtx (mode);
10045 /* Recurse to get the constant loaded. */
10046 if (ix86_expand_int_movcc (operands) == 0)
10047 return 0; /* FAIL */
10049 /* Mask in the interesting variable. */
10050 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10052 if (!rtx_equal_p (out, orig_out))
10053 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10055 return 1; /* DONE */
10059 * For comparison with above,
10069 if (! nonimmediate_operand (operands[2], mode))
10070 operands[2] = force_reg (mode, operands[2]);
10071 if (! nonimmediate_operand (operands[3], mode))
10072 operands[3] = force_reg (mode, operands[3]);
10074 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10076 rtx tmp = gen_reg_rtx (mode);
10077 emit_move_insn (tmp, operands[3]);
10080 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10082 rtx tmp = gen_reg_rtx (mode);
10083 emit_move_insn (tmp, operands[2]);
10087 if (! register_operand (operands[2], VOIDmode)
10089 || ! register_operand (operands[3], VOIDmode)))
10090 operands[2] = force_reg (mode, operands[2]);
10093 && ! register_operand (operands[3], VOIDmode))
10094 operands[3] = force_reg (mode, operands[3]);
10096 emit_insn (compare_seq);
10097 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10098 gen_rtx_IF_THEN_ELSE (mode,
10099 compare_op, operands[2],
10102 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10103 gen_rtx_IF_THEN_ELSE (mode,
10105 copy_rtx (operands[3]),
10106 copy_rtx (operands[0]))));
10108 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10109 gen_rtx_IF_THEN_ELSE (mode,
10111 copy_rtx (operands[2]),
10112 copy_rtx (operands[0]))));
10114 return 1; /* DONE */
10118 ix86_expand_fp_movcc (rtx operands[])
10120 enum rtx_code code;
10122 rtx compare_op, second_test, bypass_test;
10124 /* For SF/DFmode conditional moves based on comparisons
10125 in same mode, we may want to use SSE min/max instructions. */
10126 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10127 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10128 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10129 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10130 && (!TARGET_IEEE_FP
10131 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10132 /* We may be called from the post-reload splitter. */
10133 && (!REG_P (operands[0])
10134 || SSE_REG_P (operands[0])
10135 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10137 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10138 code = GET_CODE (operands[1]);
10140 /* See if we have (cross) match between comparison operands and
10141 conditional move operands. */
10142 if (rtx_equal_p (operands[2], op1))
10147 code = reverse_condition_maybe_unordered (code);
10149 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10151 /* Check for min operation. */
10152 if (code == LT || code == UNLE)
10160 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10161 if (memory_operand (op0, VOIDmode))
10162 op0 = force_reg (GET_MODE (operands[0]), op0);
10163 if (GET_MODE (operands[0]) == SFmode)
10164 emit_insn (gen_minsf3 (operands[0], op0, op1));
10166 emit_insn (gen_mindf3 (operands[0], op0, op1));
10169 /* Check for max operation. */
10170 if (code == GT || code == UNGE)
10178 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10179 if (memory_operand (op0, VOIDmode))
10180 op0 = force_reg (GET_MODE (operands[0]), op0);
10181 if (GET_MODE (operands[0]) == SFmode)
10182 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10184 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10188 /* Manage condition to be sse_comparison_operator. In case we are
10189 in non-ieee mode, try to canonicalize the destination operand
10190 to be first in the comparison - this helps reload to avoid extra
10192 if (!sse_comparison_operator (operands[1], VOIDmode)
10193 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10195 rtx tmp = ix86_compare_op0;
10196 ix86_compare_op0 = ix86_compare_op1;
10197 ix86_compare_op1 = tmp;
10198 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10199 VOIDmode, ix86_compare_op0,
10202 /* Similarly try to manage result to be first operand of conditional
10203 move. We also don't support the NE comparison on SSE, so try to
10205 if ((rtx_equal_p (operands[0], operands[3])
10206 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10207 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10209 rtx tmp = operands[2];
10210 operands[2] = operands[3];
10212 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10213 (GET_CODE (operands[1])),
10214 VOIDmode, ix86_compare_op0,
10217 if (GET_MODE (operands[0]) == SFmode)
10218 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10219 operands[2], operands[3],
10220 ix86_compare_op0, ix86_compare_op1));
10222 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10223 operands[2], operands[3],
10224 ix86_compare_op0, ix86_compare_op1));
10228 /* The floating point conditional move instructions don't directly
10229 support conditions resulting from a signed integer comparison. */
10231 code = GET_CODE (operands[1]);
10232 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10234 /* The floating point conditional move instructions don't directly
10235 support signed integer comparisons. */
10237 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10239 if (second_test != NULL || bypass_test != NULL)
10241 tmp = gen_reg_rtx (QImode);
10242 ix86_expand_setcc (code, tmp);
10244 ix86_compare_op0 = tmp;
10245 ix86_compare_op1 = const0_rtx;
10246 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10248 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10250 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10251 emit_move_insn (tmp, operands[3]);
10254 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10256 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10257 emit_move_insn (tmp, operands[2]);
10261 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10262 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10267 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10268 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10274 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10282 /* Expand conditional increment or decrement using adb/sbb instructions.
10283 The default case using setcc followed by the conditional move can be
10284 done by generic code. */
10286 ix86_expand_int_addcc (rtx operands[])
10288 enum rtx_code code = GET_CODE (operands[1]);
10290 rtx val = const0_rtx;
10291 bool fpcmp = false;
10292 enum machine_mode mode = GET_MODE (operands[0]);
10294 if (operands[3] != const1_rtx
10295 && operands[3] != constm1_rtx)
10297 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10298 ix86_compare_op1, &compare_op))
10300 code = GET_CODE (compare_op);
10302 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10303 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10306 code = ix86_fp_compare_code_to_integer (code);
10313 PUT_CODE (compare_op,
10314 reverse_condition_maybe_unordered
10315 (GET_CODE (compare_op)));
10317 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10319 PUT_MODE (compare_op, mode);
10321 /* Construct either adc or sbb insn. */
10322 if ((code == LTU) == (operands[3] == constm1_rtx))
10324 switch (GET_MODE (operands[0]))
10327 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10330 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10333 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10336 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10344 switch (GET_MODE (operands[0]))
10347 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10350 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10353 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10356 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10362 return 1; /* DONE */
10366 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10367 works for floating pointer parameters and nonoffsetable memories.
10368 For pushes, it returns just stack offsets; the values will be saved
10369 in the right order. Maximally three parts are generated. */
10372 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10377 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10379 size = (GET_MODE_SIZE (mode) + 4) / 8;
10381 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10383 if (size < 2 || size > 3)
10386 /* Optimize constant pool reference to immediates. This is used by fp
10387 moves, that force all constants to memory to allow combining. */
10388 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10390 rtx tmp = maybe_get_pool_constant (operand);
10395 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10397 /* The only non-offsetable memories we handle are pushes. */
10398 if (! push_operand (operand, VOIDmode))
10401 operand = copy_rtx (operand);
10402 PUT_MODE (operand, Pmode);
10403 parts[0] = parts[1] = parts[2] = operand;
10405 else if (!TARGET_64BIT)
10407 if (mode == DImode)
10408 split_di (&operand, 1, &parts[0], &parts[1]);
10411 if (REG_P (operand))
10413 if (!reload_completed)
10415 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10416 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10418 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10420 else if (offsettable_memref_p (operand))
10422 operand = adjust_address (operand, SImode, 0);
10423 parts[0] = operand;
10424 parts[1] = adjust_address (operand, SImode, 4);
10426 parts[2] = adjust_address (operand, SImode, 8);
10428 else if (GET_CODE (operand) == CONST_DOUBLE)
10433 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10437 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10438 parts[2] = gen_int_mode (l[2], SImode);
10441 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10446 parts[1] = gen_int_mode (l[1], SImode);
10447 parts[0] = gen_int_mode (l[0], SImode);
10455 if (mode == TImode)
10456 split_ti (&operand, 1, &parts[0], &parts[1]);
10457 if (mode == XFmode || mode == TFmode)
10459 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10460 if (REG_P (operand))
10462 if (!reload_completed)
10464 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10465 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10467 else if (offsettable_memref_p (operand))
10469 operand = adjust_address (operand, DImode, 0);
10470 parts[0] = operand;
10471 parts[1] = adjust_address (operand, upper_mode, 8);
10473 else if (GET_CODE (operand) == CONST_DOUBLE)
10478 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10479 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10480 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10481 if (HOST_BITS_PER_WIDE_INT >= 64)
10484 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10485 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10488 parts[0] = immed_double_const (l[0], l[1], DImode);
10489 if (upper_mode == SImode)
10490 parts[1] = gen_int_mode (l[2], SImode);
10491 else if (HOST_BITS_PER_WIDE_INT >= 64)
10494 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10495 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10498 parts[1] = immed_double_const (l[2], l[3], DImode);
10508 /* Emit insns to perform a move or push of DI, DF, and XF values.
10509 Return false when normal moves are needed; true when all required
10510 insns have been emitted. Operands 2-4 contain the input values
10511 int the correct order; operands 5-7 contain the output values. */
10514 ix86_split_long_move (rtx operands[])
10519 int collisions = 0;
10520 enum machine_mode mode = GET_MODE (operands[0]);
10522 /* The DFmode expanders may ask us to move double.
10523 For 64bit target this is single move. By hiding the fact
10524 here we simplify i386.md splitters. */
10525 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10527 /* Optimize constant pool reference to immediates. This is used by
10528 fp moves, that force all constants to memory to allow combining. */
10530 if (GET_CODE (operands[1]) == MEM
10531 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10532 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10533 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10534 if (push_operand (operands[0], VOIDmode))
10536 operands[0] = copy_rtx (operands[0]);
10537 PUT_MODE (operands[0], Pmode);
10540 operands[0] = gen_lowpart (DImode, operands[0]);
10541 operands[1] = gen_lowpart (DImode, operands[1]);
10542 emit_move_insn (operands[0], operands[1]);
10546 /* The only non-offsettable memory we handle is push. */
10547 if (push_operand (operands[0], VOIDmode))
10549 else if (GET_CODE (operands[0]) == MEM
10550 && ! offsettable_memref_p (operands[0]))
10553 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10554 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10556 /* When emitting push, take care for source operands on the stack. */
10557 if (push && GET_CODE (operands[1]) == MEM
10558 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10561 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10562 XEXP (part[1][2], 0));
10563 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10564 XEXP (part[1][1], 0));
10567 /* We need to do copy in the right order in case an address register
10568 of the source overlaps the destination. */
10569 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10571 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10573 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10576 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10579 /* Collision in the middle part can be handled by reordering. */
10580 if (collisions == 1 && nparts == 3
10581 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10584 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10585 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10588 /* If there are more collisions, we can't handle it by reordering.
10589 Do an lea to the last part and use only one colliding move. */
10590 else if (collisions > 1)
10596 base = part[0][nparts - 1];
10598 /* Handle the case when the last part isn't valid for lea.
10599 Happens in 64-bit mode storing the 12-byte XFmode. */
10600 if (GET_MODE (base) != Pmode)
10601 base = gen_rtx_REG (Pmode, REGNO (base));
10603 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10604 part[1][0] = replace_equiv_address (part[1][0], base);
10605 part[1][1] = replace_equiv_address (part[1][1],
10606 plus_constant (base, UNITS_PER_WORD));
10608 part[1][2] = replace_equiv_address (part[1][2],
10609 plus_constant (base, 8));
10619 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10620 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10621 emit_move_insn (part[0][2], part[1][2]);
10626 /* In 64bit mode we don't have 32bit push available. In case this is
10627 register, it is OK - we will just use larger counterpart. We also
10628 retype memory - these comes from attempt to avoid REX prefix on
10629 moving of second half of TFmode value. */
10630 if (GET_MODE (part[1][1]) == SImode)
10632 if (GET_CODE (part[1][1]) == MEM)
10633 part[1][1] = adjust_address (part[1][1], DImode, 0);
10634 else if (REG_P (part[1][1]))
10635 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10638 if (GET_MODE (part[1][0]) == SImode)
10639 part[1][0] = part[1][1];
10642 emit_move_insn (part[0][1], part[1][1]);
10643 emit_move_insn (part[0][0], part[1][0]);
10647 /* Choose correct order to not overwrite the source before it is copied. */
10648 if ((REG_P (part[0][0])
10649 && REG_P (part[1][1])
10650 && (REGNO (part[0][0]) == REGNO (part[1][1])
10652 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10654 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10658 operands[2] = part[0][2];
10659 operands[3] = part[0][1];
10660 operands[4] = part[0][0];
10661 operands[5] = part[1][2];
10662 operands[6] = part[1][1];
10663 operands[7] = part[1][0];
10667 operands[2] = part[0][1];
10668 operands[3] = part[0][0];
10669 operands[5] = part[1][1];
10670 operands[6] = part[1][0];
10677 operands[2] = part[0][0];
10678 operands[3] = part[0][1];
10679 operands[4] = part[0][2];
10680 operands[5] = part[1][0];
10681 operands[6] = part[1][1];
10682 operands[7] = part[1][2];
10686 operands[2] = part[0][0];
10687 operands[3] = part[0][1];
10688 operands[5] = part[1][0];
10689 operands[6] = part[1][1];
10692 emit_move_insn (operands[2], operands[5]);
10693 emit_move_insn (operands[3], operands[6]);
10695 emit_move_insn (operands[4], operands[7]);
10701 ix86_split_ashldi (rtx *operands, rtx scratch)
10703 rtx low[2], high[2];
10706 if (GET_CODE (operands[2]) == CONST_INT)
10708 split_di (operands, 2, low, high);
10709 count = INTVAL (operands[2]) & 63;
10713 emit_move_insn (high[0], low[1]);
10714 emit_move_insn (low[0], const0_rtx);
10717 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10721 if (!rtx_equal_p (operands[0], operands[1]))
10722 emit_move_insn (operands[0], operands[1]);
10723 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10724 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10729 if (!rtx_equal_p (operands[0], operands[1]))
10730 emit_move_insn (operands[0], operands[1]);
10732 split_di (operands, 1, low, high);
10734 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10735 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10737 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10739 if (! no_new_pseudos)
10740 scratch = force_reg (SImode, const0_rtx);
10742 emit_move_insn (scratch, const0_rtx);
10744 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10748 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10753 ix86_split_ashrdi (rtx *operands, rtx scratch)
10755 rtx low[2], high[2];
10758 if (GET_CODE (operands[2]) == CONST_INT)
10760 split_di (operands, 2, low, high);
10761 count = INTVAL (operands[2]) & 63;
10765 emit_move_insn (low[0], high[1]);
10767 if (! reload_completed)
10768 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10771 emit_move_insn (high[0], low[0]);
10772 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10776 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10780 if (!rtx_equal_p (operands[0], operands[1]))
10781 emit_move_insn (operands[0], operands[1]);
10782 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10783 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10788 if (!rtx_equal_p (operands[0], operands[1]))
10789 emit_move_insn (operands[0], operands[1]);
10791 split_di (operands, 1, low, high);
10793 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10794 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10796 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10798 if (! no_new_pseudos)
10799 scratch = gen_reg_rtx (SImode);
10800 emit_move_insn (scratch, high[0]);
10801 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10802 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10806 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10811 ix86_split_lshrdi (rtx *operands, rtx scratch)
10813 rtx low[2], high[2];
10816 if (GET_CODE (operands[2]) == CONST_INT)
10818 split_di (operands, 2, low, high);
10819 count = INTVAL (operands[2]) & 63;
10823 emit_move_insn (low[0], high[1]);
10824 emit_move_insn (high[0], const0_rtx);
10827 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10831 if (!rtx_equal_p (operands[0], operands[1]))
10832 emit_move_insn (operands[0], operands[1]);
10833 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10834 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10839 if (!rtx_equal_p (operands[0], operands[1]))
10840 emit_move_insn (operands[0], operands[1]);
10842 split_di (operands, 1, low, high);
10844 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10845 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10847 /* Heh. By reversing the arguments, we can reuse this pattern. */
10848 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10850 if (! no_new_pseudos)
10851 scratch = force_reg (SImode, const0_rtx);
10853 emit_move_insn (scratch, const0_rtx);
10855 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10859 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10863 /* Helper function for the string operations below. Dest VARIABLE whether
10864 it is aligned to VALUE bytes. If true, jump to the label. */
10866 ix86_expand_aligntest (rtx variable, int value)
10868 rtx label = gen_label_rtx ();
10869 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10870 if (GET_MODE (variable) == DImode)
10871 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10873 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10874 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10879 /* Adjust COUNTER by the VALUE. */
10881 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10883 if (GET_MODE (countreg) == DImode)
10884 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10886 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10889 /* Zero extend possibly SImode EXP to Pmode register. */
10891 ix86_zero_extend_to_Pmode (rtx exp)
10894 if (GET_MODE (exp) == VOIDmode)
10895 return force_reg (Pmode, exp);
10896 if (GET_MODE (exp) == Pmode)
10897 return copy_to_mode_reg (Pmode, exp);
10898 r = gen_reg_rtx (Pmode);
10899 emit_insn (gen_zero_extendsidi2 (r, exp));
10903 /* Expand string move (memcpy) operation. Use i386 string operations when
10904 profitable. expand_clrstr contains similar code. */
10906 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10908 rtx srcreg, destreg, countreg;
10909 enum machine_mode counter_mode;
10910 HOST_WIDE_INT align = 0;
10911 unsigned HOST_WIDE_INT count = 0;
10914 if (GET_CODE (align_exp) == CONST_INT)
10915 align = INTVAL (align_exp);
10917 /* Can't use any of this if the user has appropriated esi or edi. */
10918 if (global_regs[4] || global_regs[5])
10921 /* This simple hack avoids all inlining code and simplifies code below. */
10922 if (!TARGET_ALIGN_STRINGOPS)
10925 if (GET_CODE (count_exp) == CONST_INT)
10927 count = INTVAL (count_exp);
10928 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10932 /* Figure out proper mode for counter. For 32bits it is always SImode,
10933 for 64bits use SImode when possible, otherwise DImode.
10934 Set count to number of bytes copied when known at compile time. */
10935 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10936 || x86_64_zero_extended_value (count_exp))
10937 counter_mode = SImode;
10939 counter_mode = DImode;
10943 if (counter_mode != SImode && counter_mode != DImode)
10946 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10947 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10949 emit_insn (gen_cld ());
10951 /* When optimizing for size emit simple rep ; movsb instruction for
10952 counts not divisible by 4. */
10954 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10956 countreg = ix86_zero_extend_to_Pmode (count_exp);
10958 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10959 destreg, srcreg, countreg));
10961 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10962 destreg, srcreg, countreg));
10965 /* For constant aligned (or small unaligned) copies use rep movsl
10966 followed by code copying the rest. For PentiumPro ensure 8 byte
10967 alignment to allow rep movsl acceleration. */
10969 else if (count != 0
10971 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10972 || optimize_size || count < (unsigned int) 64))
10974 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10975 if (count & ~(size - 1))
10977 countreg = copy_to_mode_reg (counter_mode,
10978 GEN_INT ((count >> (size == 4 ? 2 : 3))
10979 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10980 countreg = ix86_zero_extend_to_Pmode (countreg);
10984 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10985 destreg, srcreg, countreg));
10987 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10988 destreg, srcreg, countreg));
10991 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10992 destreg, srcreg, countreg));
10994 if (size == 8 && (count & 0x04))
10995 emit_insn (gen_strmovsi (destreg, srcreg));
10997 emit_insn (gen_strmovhi (destreg, srcreg));
10999 emit_insn (gen_strmovqi (destreg, srcreg));
11001 /* The generic code based on the glibc implementation:
11002 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11003 allowing accelerated copying there)
11004 - copy the data using rep movsl
11005 - copy the rest. */
11010 int desired_alignment = (TARGET_PENTIUMPRO
11011 && (count == 0 || count >= (unsigned int) 260)
11012 ? 8 : UNITS_PER_WORD);
11014 /* In case we don't know anything about the alignment, default to
11015 library version, since it is usually equally fast and result in
11018 Also emit call when we know that the count is large and call overhead
11019 will not be important. */
11020 if (!TARGET_INLINE_ALL_STRINGOPS
11021 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11027 if (TARGET_SINGLE_STRINGOP)
11028 emit_insn (gen_cld ());
11030 countreg2 = gen_reg_rtx (Pmode);
11031 countreg = copy_to_mode_reg (counter_mode, count_exp);
11033 /* We don't use loops to align destination and to copy parts smaller
11034 than 4 bytes, because gcc is able to optimize such code better (in
11035 the case the destination or the count really is aligned, gcc is often
11036 able to predict the branches) and also it is friendlier to the
11037 hardware branch prediction.
11039 Using loops is beneficial for generic case, because we can
11040 handle small counts using the loops. Many CPUs (such as Athlon)
11041 have large REP prefix setup costs.
11043 This is quite costly. Maybe we can revisit this decision later or
11044 add some customizability to this code. */
11046 if (count == 0 && align < desired_alignment)
11048 label = gen_label_rtx ();
11049 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11050 LEU, 0, counter_mode, 1, label);
11054 rtx label = ix86_expand_aligntest (destreg, 1);
11055 emit_insn (gen_strmovqi (destreg, srcreg));
11056 ix86_adjust_counter (countreg, 1);
11057 emit_label (label);
11058 LABEL_NUSES (label) = 1;
11062 rtx label = ix86_expand_aligntest (destreg, 2);
11063 emit_insn (gen_strmovhi (destreg, srcreg));
11064 ix86_adjust_counter (countreg, 2);
11065 emit_label (label);
11066 LABEL_NUSES (label) = 1;
11068 if (align <= 4 && desired_alignment > 4)
11070 rtx label = ix86_expand_aligntest (destreg, 4);
11071 emit_insn (gen_strmovsi (destreg, srcreg));
11072 ix86_adjust_counter (countreg, 4);
11073 emit_label (label);
11074 LABEL_NUSES (label) = 1;
11077 if (label && desired_alignment > 4 && !TARGET_64BIT)
11079 emit_label (label);
11080 LABEL_NUSES (label) = 1;
11083 if (!TARGET_SINGLE_STRINGOP)
11084 emit_insn (gen_cld ());
11087 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11089 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11090 destreg, srcreg, countreg2));
11094 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11095 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11096 destreg, srcreg, countreg2));
11101 emit_label (label);
11102 LABEL_NUSES (label) = 1;
11104 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11105 emit_insn (gen_strmovsi (destreg, srcreg));
11106 if ((align <= 4 || count == 0) && TARGET_64BIT)
11108 rtx label = ix86_expand_aligntest (countreg, 4);
11109 emit_insn (gen_strmovsi (destreg, srcreg));
11110 emit_label (label);
11111 LABEL_NUSES (label) = 1;
11113 if (align > 2 && count != 0 && (count & 2))
11114 emit_insn (gen_strmovhi (destreg, srcreg));
11115 if (align <= 2 || count == 0)
11117 rtx label = ix86_expand_aligntest (countreg, 2);
11118 emit_insn (gen_strmovhi (destreg, srcreg));
11119 emit_label (label);
11120 LABEL_NUSES (label) = 1;
11122 if (align > 1 && count != 0 && (count & 1))
11123 emit_insn (gen_strmovqi (destreg, srcreg));
11124 if (align <= 1 || count == 0)
11126 rtx label = ix86_expand_aligntest (countreg, 1);
11127 emit_insn (gen_strmovqi (destreg, srcreg));
11128 emit_label (label);
11129 LABEL_NUSES (label) = 1;
11133 insns = get_insns ();
11136 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11141 /* Expand string clear operation (bzero). Use i386 string operations when
11142 profitable. expand_movstr contains similar code. */
11144 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11146 rtx destreg, zeroreg, countreg;
11147 enum machine_mode counter_mode;
11148 HOST_WIDE_INT align = 0;
11149 unsigned HOST_WIDE_INT count = 0;
11151 if (GET_CODE (align_exp) == CONST_INT)
11152 align = INTVAL (align_exp);
11154 /* Can't use any of this if the user has appropriated esi. */
11155 if (global_regs[4])
11158 /* This simple hack avoids all inlining code and simplifies code below. */
11159 if (!TARGET_ALIGN_STRINGOPS)
11162 if (GET_CODE (count_exp) == CONST_INT)
11164 count = INTVAL (count_exp);
11165 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11168 /* Figure out proper mode for counter. For 32bits it is always SImode,
11169 for 64bits use SImode when possible, otherwise DImode.
11170 Set count to number of bytes copied when known at compile time. */
11171 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11172 || x86_64_zero_extended_value (count_exp))
11173 counter_mode = SImode;
11175 counter_mode = DImode;
11177 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11179 emit_insn (gen_cld ());
11181 /* When optimizing for size emit simple rep ; movsb instruction for
11182 counts not divisible by 4. */
11184 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11186 countreg = ix86_zero_extend_to_Pmode (count_exp);
11187 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11189 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11190 destreg, countreg));
11192 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11193 destreg, countreg));
11195 else if (count != 0
11197 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11198 || optimize_size || count < (unsigned int) 64))
11200 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11201 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11202 if (count & ~(size - 1))
11204 countreg = copy_to_mode_reg (counter_mode,
11205 GEN_INT ((count >> (size == 4 ? 2 : 3))
11206 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11207 countreg = ix86_zero_extend_to_Pmode (countreg);
11211 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11212 destreg, countreg));
11214 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11215 destreg, countreg));
11218 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11219 destreg, countreg));
11221 if (size == 8 && (count & 0x04))
11222 emit_insn (gen_strsetsi (destreg,
11223 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11225 emit_insn (gen_strsethi (destreg,
11226 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11228 emit_insn (gen_strsetqi (destreg,
11229 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11235 /* Compute desired alignment of the string operation. */
11236 int desired_alignment = (TARGET_PENTIUMPRO
11237 && (count == 0 || count >= (unsigned int) 260)
11238 ? 8 : UNITS_PER_WORD);
11240 /* In case we don't know anything about the alignment, default to
11241 library version, since it is usually equally fast and result in
11244 Also emit call when we know that the count is large and call overhead
11245 will not be important. */
11246 if (!TARGET_INLINE_ALL_STRINGOPS
11247 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11250 if (TARGET_SINGLE_STRINGOP)
11251 emit_insn (gen_cld ());
11253 countreg2 = gen_reg_rtx (Pmode);
11254 countreg = copy_to_mode_reg (counter_mode, count_exp);
11255 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11257 if (count == 0 && align < desired_alignment)
11259 label = gen_label_rtx ();
11260 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11261 LEU, 0, counter_mode, 1, label);
11265 rtx label = ix86_expand_aligntest (destreg, 1);
11266 emit_insn (gen_strsetqi (destreg,
11267 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11268 ix86_adjust_counter (countreg, 1);
11269 emit_label (label);
11270 LABEL_NUSES (label) = 1;
11274 rtx label = ix86_expand_aligntest (destreg, 2);
11275 emit_insn (gen_strsethi (destreg,
11276 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11277 ix86_adjust_counter (countreg, 2);
11278 emit_label (label);
11279 LABEL_NUSES (label) = 1;
11281 if (align <= 4 && desired_alignment > 4)
11283 rtx label = ix86_expand_aligntest (destreg, 4);
11284 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11285 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11287 ix86_adjust_counter (countreg, 4);
11288 emit_label (label);
11289 LABEL_NUSES (label) = 1;
11292 if (label && desired_alignment > 4 && !TARGET_64BIT)
11294 emit_label (label);
11295 LABEL_NUSES (label) = 1;
11299 if (!TARGET_SINGLE_STRINGOP)
11300 emit_insn (gen_cld ());
11303 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11305 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11306 destreg, countreg2));
11310 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11311 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11312 destreg, countreg2));
11316 emit_label (label);
11317 LABEL_NUSES (label) = 1;
11320 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11321 emit_insn (gen_strsetsi (destreg,
11322 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11323 if (TARGET_64BIT && (align <= 4 || count == 0))
11325 rtx label = ix86_expand_aligntest (countreg, 4);
11326 emit_insn (gen_strsetsi (destreg,
11327 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11328 emit_label (label);
11329 LABEL_NUSES (label) = 1;
11331 if (align > 2 && count != 0 && (count & 2))
11332 emit_insn (gen_strsethi (destreg,
11333 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11334 if (align <= 2 || count == 0)
11336 rtx label = ix86_expand_aligntest (countreg, 2);
11337 emit_insn (gen_strsethi (destreg,
11338 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11339 emit_label (label);
11340 LABEL_NUSES (label) = 1;
11342 if (align > 1 && count != 0 && (count & 1))
11343 emit_insn (gen_strsetqi (destreg,
11344 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11345 if (align <= 1 || count == 0)
11347 rtx label = ix86_expand_aligntest (countreg, 1);
11348 emit_insn (gen_strsetqi (destreg,
11349 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11350 emit_label (label);
11351 LABEL_NUSES (label) = 1;
11356 /* Expand strlen. */
11358 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11360 rtx addr, scratch1, scratch2, scratch3, scratch4;
11362 /* The generic case of strlen expander is long. Avoid it's
11363 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11365 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11366 && !TARGET_INLINE_ALL_STRINGOPS
11368 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11371 addr = force_reg (Pmode, XEXP (src, 0));
11372 scratch1 = gen_reg_rtx (Pmode);
11374 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11377 /* Well it seems that some optimizer does not combine a call like
11378 foo(strlen(bar), strlen(bar));
11379 when the move and the subtraction is done here. It does calculate
11380 the length just once when these instructions are done inside of
11381 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11382 often used and I use one fewer register for the lifetime of
11383 output_strlen_unroll() this is better. */
11385 emit_move_insn (out, addr);
11387 ix86_expand_strlensi_unroll_1 (out, align);
11389 /* strlensi_unroll_1 returns the address of the zero at the end of
11390 the string, like memchr(), so compute the length by subtracting
11391 the start address. */
11393 emit_insn (gen_subdi3 (out, out, addr));
11395 emit_insn (gen_subsi3 (out, out, addr));
11399 scratch2 = gen_reg_rtx (Pmode);
11400 scratch3 = gen_reg_rtx (Pmode);
11401 scratch4 = force_reg (Pmode, constm1_rtx);
11403 emit_move_insn (scratch3, addr);
11404 eoschar = force_reg (QImode, eoschar);
11406 emit_insn (gen_cld ());
11409 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11410 align, scratch4, scratch3));
11411 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11412 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11416 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11417 align, scratch4, scratch3));
11418 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11419 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11425 /* Expand the appropriate insns for doing strlen if not just doing
11428 out = result, initialized with the start address
11429 align_rtx = alignment of the address.
11430 scratch = scratch register, initialized with the startaddress when
11431 not aligned, otherwise undefined
11433 This is just the body. It needs the initializations mentioned above and
11434 some address computing at the end. These things are done in i386.md. */
11437 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11441 rtx align_2_label = NULL_RTX;
11442 rtx align_3_label = NULL_RTX;
11443 rtx align_4_label = gen_label_rtx ();
11444 rtx end_0_label = gen_label_rtx ();
11446 rtx tmpreg = gen_reg_rtx (SImode);
11447 rtx scratch = gen_reg_rtx (SImode);
11451 if (GET_CODE (align_rtx) == CONST_INT)
11452 align = INTVAL (align_rtx);
11454 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11456 /* Is there a known alignment and is it less than 4? */
11459 rtx scratch1 = gen_reg_rtx (Pmode);
11460 emit_move_insn (scratch1, out);
11461 /* Is there a known alignment and is it not 2? */
11464 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11465 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11467 /* Leave just the 3 lower bits. */
11468 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11469 NULL_RTX, 0, OPTAB_WIDEN);
11471 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11472 Pmode, 1, align_4_label);
11473 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11474 Pmode, 1, align_2_label);
11475 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11476 Pmode, 1, align_3_label);
11480 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11481 check if is aligned to 4 - byte. */
11483 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11484 NULL_RTX, 0, OPTAB_WIDEN);
11486 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11487 Pmode, 1, align_4_label);
11490 mem = gen_rtx_MEM (QImode, out);
11492 /* Now compare the bytes. */
11494 /* Compare the first n unaligned byte on a byte per byte basis. */
11495 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11496 QImode, 1, end_0_label);
11498 /* Increment the address. */
11500 emit_insn (gen_adddi3 (out, out, const1_rtx));
11502 emit_insn (gen_addsi3 (out, out, const1_rtx));
11504 /* Not needed with an alignment of 2 */
11507 emit_label (align_2_label);
11509 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11513 emit_insn (gen_adddi3 (out, out, const1_rtx));
11515 emit_insn (gen_addsi3 (out, out, const1_rtx));
11517 emit_label (align_3_label);
11520 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11524 emit_insn (gen_adddi3 (out, out, const1_rtx));
11526 emit_insn (gen_addsi3 (out, out, const1_rtx));
11529 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11530 align this loop. It gives only huge programs, but does not help to
11532 emit_label (align_4_label);
11534 mem = gen_rtx_MEM (SImode, out);
11535 emit_move_insn (scratch, mem);
11537 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11539 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11541 /* This formula yields a nonzero result iff one of the bytes is zero.
11542 This saves three branches inside loop and many cycles. */
11544 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11545 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11546 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11547 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11548 gen_int_mode (0x80808080, SImode)));
11549 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11554 rtx reg = gen_reg_rtx (SImode);
11555 rtx reg2 = gen_reg_rtx (Pmode);
11556 emit_move_insn (reg, tmpreg);
11557 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11559 /* If zero is not in the first two bytes, move two bytes forward. */
11560 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11561 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11562 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11563 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11564 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11567 /* Emit lea manually to avoid clobbering of flags. */
11568 emit_insn (gen_rtx_SET (SImode, reg2,
11569 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11571 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11572 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11573 emit_insn (gen_rtx_SET (VOIDmode, out,
11574 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11581 rtx end_2_label = gen_label_rtx ();
11582 /* Is zero in the first two bytes? */
11584 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11585 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11586 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11587 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11588 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11590 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11591 JUMP_LABEL (tmp) = end_2_label;
11593 /* Not in the first two. Move two bytes forward. */
11594 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11596 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11598 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11600 emit_label (end_2_label);
11604 /* Avoid branch in fixing the byte. */
11605 tmpreg = gen_lowpart (QImode, tmpreg);
11606 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11607 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11609 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11611 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11613 emit_label (end_0_label);
11617 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11618 rtx callarg2 ATTRIBUTE_UNUSED,
11619 rtx pop, int sibcall)
11621 rtx use = NULL, call;
11623 if (pop == const0_rtx)
11625 if (TARGET_64BIT && pop)
11629 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11630 fnaddr = machopic_indirect_call_target (fnaddr);
11632 /* Static functions and indirect calls don't need the pic register. */
11633 if (! TARGET_64BIT && flag_pic
11634 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11635 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11636 use_reg (&use, pic_offset_table_rtx);
11638 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11640 rtx al = gen_rtx_REG (QImode, 0);
11641 emit_move_insn (al, callarg2);
11642 use_reg (&use, al);
11644 #endif /* TARGET_MACHO */
11646 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11648 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11649 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11651 if (sibcall && TARGET_64BIT
11652 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11655 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11656 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11657 emit_move_insn (fnaddr, addr);
11658 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11661 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11663 call = gen_rtx_SET (VOIDmode, retval, call);
11666 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11667 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11668 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11671 call = emit_call_insn (call);
11673 CALL_INSN_FUNCTION_USAGE (call) = use;
11677 /* Clear stack slot assignments remembered from previous functions.
11678 This is called from INIT_EXPANDERS once before RTL is emitted for each
11681 static struct machine_function *
11682 ix86_init_machine_status (void)
11684 struct machine_function *f;
11686 f = ggc_alloc_cleared (sizeof (struct machine_function));
11687 f->use_fast_prologue_epilogue_nregs = -1;
11692 /* Return a MEM corresponding to a stack slot with mode MODE.
11693 Allocate a new slot if necessary.
11695 The RTL for a function can have several slots available: N is
11696 which slot to use. */
11699 assign_386_stack_local (enum machine_mode mode, int n)
11701 struct stack_local_entry *s;
11703 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11706 for (s = ix86_stack_locals; s; s = s->next)
11707 if (s->mode == mode && s->n == n)
11710 s = (struct stack_local_entry *)
11711 ggc_alloc (sizeof (struct stack_local_entry));
11714 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11716 s->next = ix86_stack_locals;
11717 ix86_stack_locals = s;
11721 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11723 static GTY(()) rtx ix86_tls_symbol;
11725 ix86_tls_get_addr (void)
11728 if (!ix86_tls_symbol)
11730 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11731 (TARGET_GNU_TLS && !TARGET_64BIT)
11732 ? "___tls_get_addr"
11733 : "__tls_get_addr");
11736 return ix86_tls_symbol;
11739 /* Calculate the length of the memory address in the instruction
11740 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11743 memory_address_length (rtx addr)
11745 struct ix86_address parts;
11746 rtx base, index, disp;
11749 if (GET_CODE (addr) == PRE_DEC
11750 || GET_CODE (addr) == POST_INC
11751 || GET_CODE (addr) == PRE_MODIFY
11752 || GET_CODE (addr) == POST_MODIFY)
11755 if (! ix86_decompose_address (addr, &parts))
11759 index = parts.index;
11764 - esp as the base always wants an index,
11765 - ebp as the base always wants a displacement. */
11767 /* Register Indirect. */
11768 if (base && !index && !disp)
11770 /* esp (for its index) and ebp (for its displacement) need
11771 the two-byte modrm form. */
11772 if (addr == stack_pointer_rtx
11773 || addr == arg_pointer_rtx
11774 || addr == frame_pointer_rtx
11775 || addr == hard_frame_pointer_rtx)
11779 /* Direct Addressing. */
11780 else if (disp && !base && !index)
11785 /* Find the length of the displacement constant. */
11788 if (GET_CODE (disp) == CONST_INT
11789 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11795 /* ebp always wants a displacement. */
11796 else if (base == hard_frame_pointer_rtx)
11799 /* An index requires the two-byte modrm form... */
11801 /* ...like esp, which always wants an index. */
11802 || base == stack_pointer_rtx
11803 || base == arg_pointer_rtx
11804 || base == frame_pointer_rtx)
11811 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11812 is set, expect that insn have 8bit immediate alternative. */
11814 ix86_attr_length_immediate_default (rtx insn, int shortform)
11818 extract_insn_cached (insn);
11819 for (i = recog_data.n_operands - 1; i >= 0; --i)
11820 if (CONSTANT_P (recog_data.operand[i]))
11825 && GET_CODE (recog_data.operand[i]) == CONST_INT
11826 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11830 switch (get_attr_mode (insn))
11841 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11846 fatal_insn ("unknown insn mode", insn);
11852 /* Compute default value for "length_address" attribute. */
11854 ix86_attr_length_address_default (rtx insn)
11858 if (get_attr_type (insn) == TYPE_LEA)
11860 rtx set = PATTERN (insn);
11861 if (GET_CODE (set) == SET)
11863 else if (GET_CODE (set) == PARALLEL
11864 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11865 set = XVECEXP (set, 0, 0);
11868 #ifdef ENABLE_CHECKING
11874 return memory_address_length (SET_SRC (set));
11877 extract_insn_cached (insn);
11878 for (i = recog_data.n_operands - 1; i >= 0; --i)
11879 if (GET_CODE (recog_data.operand[i]) == MEM)
11881 return memory_address_length (XEXP (recog_data.operand[i], 0));
11887 /* Return the maximum number of instructions a cpu can issue. */
11890 ix86_issue_rate (void)
11894 case PROCESSOR_PENTIUM:
11898 case PROCESSOR_PENTIUMPRO:
11899 case PROCESSOR_PENTIUM4:
11900 case PROCESSOR_ATHLON:
11909 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11910 by DEP_INSN and nothing set by DEP_INSN. */
11913 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11917 /* Simplify the test for uninteresting insns. */
11918 if (insn_type != TYPE_SETCC
11919 && insn_type != TYPE_ICMOV
11920 && insn_type != TYPE_FCMOV
11921 && insn_type != TYPE_IBR)
11924 if ((set = single_set (dep_insn)) != 0)
11926 set = SET_DEST (set);
11929 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11930 && XVECLEN (PATTERN (dep_insn), 0) == 2
11931 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11932 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11934 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11935 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11940 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11943 /* This test is true if the dependent insn reads the flags but
11944 not any other potentially set register. */
11945 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11948 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11954 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11955 address with operands set by DEP_INSN. */
11958 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11962 if (insn_type == TYPE_LEA
11965 addr = PATTERN (insn);
11966 if (GET_CODE (addr) == SET)
11968 else if (GET_CODE (addr) == PARALLEL
11969 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11970 addr = XVECEXP (addr, 0, 0);
11973 addr = SET_SRC (addr);
11978 extract_insn_cached (insn);
11979 for (i = recog_data.n_operands - 1; i >= 0; --i)
11980 if (GET_CODE (recog_data.operand[i]) == MEM)
11982 addr = XEXP (recog_data.operand[i], 0);
11989 return modified_in_p (addr, dep_insn);
11993 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11995 enum attr_type insn_type, dep_insn_type;
11996 enum attr_memory memory, dep_memory;
11998 int dep_insn_code_number;
12000 /* Anti and output dependencies have zero cost on all CPUs. */
12001 if (REG_NOTE_KIND (link) != 0)
12004 dep_insn_code_number = recog_memoized (dep_insn);
12006 /* If we can't recognize the insns, we can't really do anything. */
12007 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12010 insn_type = get_attr_type (insn);
12011 dep_insn_type = get_attr_type (dep_insn);
12015 case PROCESSOR_PENTIUM:
12016 /* Address Generation Interlock adds a cycle of latency. */
12017 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12020 /* ??? Compares pair with jump/setcc. */
12021 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12024 /* Floating point stores require value to be ready one cycle earlier. */
12025 if (insn_type == TYPE_FMOV
12026 && get_attr_memory (insn) == MEMORY_STORE
12027 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12031 case PROCESSOR_PENTIUMPRO:
12032 memory = get_attr_memory (insn);
12033 dep_memory = get_attr_memory (dep_insn);
12035 /* Since we can't represent delayed latencies of load+operation,
12036 increase the cost here for non-imov insns. */
12037 if (dep_insn_type != TYPE_IMOV
12038 && dep_insn_type != TYPE_FMOV
12039 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12042 /* INT->FP conversion is expensive. */
12043 if (get_attr_fp_int_src (dep_insn))
12046 /* There is one cycle extra latency between an FP op and a store. */
12047 if (insn_type == TYPE_FMOV
12048 && (set = single_set (dep_insn)) != NULL_RTX
12049 && (set2 = single_set (insn)) != NULL_RTX
12050 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12051 && GET_CODE (SET_DEST (set2)) == MEM)
12054 /* Show ability of reorder buffer to hide latency of load by executing
12055 in parallel with previous instruction in case
12056 previous instruction is not needed to compute the address. */
12057 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12058 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12060 /* Claim moves to take one cycle, as core can issue one load
12061 at time and the next load can start cycle later. */
12062 if (dep_insn_type == TYPE_IMOV
12063 || dep_insn_type == TYPE_FMOV)
12071 memory = get_attr_memory (insn);
12072 dep_memory = get_attr_memory (dep_insn);
12073 /* The esp dependency is resolved before the instruction is really
12075 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12076 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12079 /* Since we can't represent delayed latencies of load+operation,
12080 increase the cost here for non-imov insns. */
12081 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12082 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12084 /* INT->FP conversion is expensive. */
12085 if (get_attr_fp_int_src (dep_insn))
12088 /* Show ability of reorder buffer to hide latency of load by executing
12089 in parallel with previous instruction in case
12090 previous instruction is not needed to compute the address. */
12091 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12092 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12094 /* Claim moves to take one cycle, as core can issue one load
12095 at time and the next load can start cycle later. */
12096 if (dep_insn_type == TYPE_IMOV
12097 || dep_insn_type == TYPE_FMOV)
12106 case PROCESSOR_ATHLON:
12108 memory = get_attr_memory (insn);
12109 dep_memory = get_attr_memory (dep_insn);
12111 /* Show ability of reorder buffer to hide latency of load by executing
12112 in parallel with previous instruction in case
12113 previous instruction is not needed to compute the address. */
12114 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12115 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12117 enum attr_unit unit = get_attr_unit (insn);
12120 /* Because of the difference between the length of integer and
12121 floating unit pipeline preparation stages, the memory operands
12122 for floating point are cheaper.
12124 ??? For Athlon it the difference is most probably 2. */
12125 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12128 loadcost = TARGET_ATHLON ? 2 : 0;
12130 if (cost >= loadcost)
12145 struct ppro_sched_data
12148 int issued_this_cycle;
12152 static enum attr_ppro_uops
12153 ix86_safe_ppro_uops (rtx insn)
12155 if (recog_memoized (insn) >= 0)
12156 return get_attr_ppro_uops (insn);
12158 return PPRO_UOPS_MANY;
12162 ix86_dump_ppro_packet (FILE *dump)
12164 if (ix86_sched_data.ppro.decode[0])
12166 fprintf (dump, "PPRO packet: %d",
12167 INSN_UID (ix86_sched_data.ppro.decode[0]));
12168 if (ix86_sched_data.ppro.decode[1])
12169 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12170 if (ix86_sched_data.ppro.decode[2])
12171 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12172 fputc ('\n', dump);
12176 /* We're beginning a new block. Initialize data structures as necessary. */
12179 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12180 int sched_verbose ATTRIBUTE_UNUSED,
12181 int veclen ATTRIBUTE_UNUSED)
12183 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12186 /* Shift INSN to SLOT, and shift everything else down. */
12189 ix86_reorder_insn (rtx *insnp, rtx *slot)
12195 insnp[0] = insnp[1];
12196 while (++insnp != slot);
12202 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12205 enum attr_ppro_uops cur_uops;
12206 int issued_this_cycle;
12210 /* At this point .ppro.decode contains the state of the three
12211 decoders from last "cycle". That is, those insns that were
12212 actually independent. But here we're scheduling for the
12213 decoder, and we may find things that are decodable in the
12216 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12217 issued_this_cycle = 0;
12220 cur_uops = ix86_safe_ppro_uops (*insnp);
12222 /* If the decoders are empty, and we've a complex insn at the
12223 head of the priority queue, let it issue without complaint. */
12224 if (decode[0] == NULL)
12226 if (cur_uops == PPRO_UOPS_MANY)
12228 decode[0] = *insnp;
12232 /* Otherwise, search for a 2-4 uop unsn to issue. */
12233 while (cur_uops != PPRO_UOPS_FEW)
12235 if (insnp == ready)
12237 cur_uops = ix86_safe_ppro_uops (*--insnp);
12240 /* If so, move it to the head of the line. */
12241 if (cur_uops == PPRO_UOPS_FEW)
12242 ix86_reorder_insn (insnp, e_ready);
12244 /* Issue the head of the queue. */
12245 issued_this_cycle = 1;
12246 decode[0] = *e_ready--;
12249 /* Look for simple insns to fill in the other two slots. */
12250 for (i = 1; i < 3; ++i)
12251 if (decode[i] == NULL)
12253 if (ready > e_ready)
12257 cur_uops = ix86_safe_ppro_uops (*insnp);
12258 while (cur_uops != PPRO_UOPS_ONE)
12260 if (insnp == ready)
12262 cur_uops = ix86_safe_ppro_uops (*--insnp);
12265 /* Found one. Move it to the head of the queue and issue it. */
12266 if (cur_uops == PPRO_UOPS_ONE)
12268 ix86_reorder_insn (insnp, e_ready);
12269 decode[i] = *e_ready--;
12270 issued_this_cycle++;
12274 /* ??? Didn't find one. Ideally, here we would do a lazy split
12275 of 2-uop insns, issue one and queue the other. */
12279 if (issued_this_cycle == 0)
12280 issued_this_cycle = 1;
12281 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12284 /* We are about to being issuing insns for this clock cycle.
12285 Override the default sort algorithm to better slot instructions. */
12287 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12288 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12289 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12291 int n_ready = *n_readyp;
12292 rtx *e_ready = ready + n_ready - 1;
12294 /* Make sure to go ahead and initialize key items in
12295 ix86_sched_data if we are not going to bother trying to
12296 reorder the ready queue. */
12299 ix86_sched_data.ppro.issued_this_cycle = 1;
12308 case PROCESSOR_PENTIUMPRO:
12309 ix86_sched_reorder_ppro (ready, e_ready);
12314 return ix86_issue_rate ();
12317 /* We are about to issue INSN. Return the number of insns left on the
12318 ready queue that can be issued this cycle. */
12321 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12322 int can_issue_more)
12328 return can_issue_more - 1;
12330 case PROCESSOR_PENTIUMPRO:
12332 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12334 if (uops == PPRO_UOPS_MANY)
12337 ix86_dump_ppro_packet (dump);
12338 ix86_sched_data.ppro.decode[0] = insn;
12339 ix86_sched_data.ppro.decode[1] = NULL;
12340 ix86_sched_data.ppro.decode[2] = NULL;
12342 ix86_dump_ppro_packet (dump);
12343 ix86_sched_data.ppro.decode[0] = NULL;
12345 else if (uops == PPRO_UOPS_FEW)
12348 ix86_dump_ppro_packet (dump);
12349 ix86_sched_data.ppro.decode[0] = insn;
12350 ix86_sched_data.ppro.decode[1] = NULL;
12351 ix86_sched_data.ppro.decode[2] = NULL;
12355 for (i = 0; i < 3; ++i)
12356 if (ix86_sched_data.ppro.decode[i] == NULL)
12358 ix86_sched_data.ppro.decode[i] = insn;
12366 ix86_dump_ppro_packet (dump);
12367 ix86_sched_data.ppro.decode[0] = NULL;
12368 ix86_sched_data.ppro.decode[1] = NULL;
12369 ix86_sched_data.ppro.decode[2] = NULL;
12373 return --ix86_sched_data.ppro.issued_this_cycle;
12378 ia32_use_dfa_pipeline_interface (void)
12380 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12385 /* How many alternative schedules to try. This should be as wide as the
12386 scheduling freedom in the DFA, but no wider. Making this value too
12387 large results extra work for the scheduler. */
12390 ia32_multipass_dfa_lookahead (void)
12392 if (ix86_tune == PROCESSOR_PENTIUM)
12399 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12400 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12404 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12409 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12411 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12415 /* Subroutine of above to actually do the updating by recursively walking
12419 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12422 enum rtx_code code = GET_CODE (x);
12423 const char *format_ptr = GET_RTX_FORMAT (code);
12426 if (code == MEM && XEXP (x, 0) == dstreg)
12427 MEM_COPY_ATTRIBUTES (x, dstref);
12428 else if (code == MEM && XEXP (x, 0) == srcreg)
12429 MEM_COPY_ATTRIBUTES (x, srcref);
12431 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12433 if (*format_ptr == 'e')
12434 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12436 else if (*format_ptr == 'E')
12437 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12438 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12443 /* Compute the alignment given to a constant that is being placed in memory.
12444 EXP is the constant and ALIGN is the alignment that the object would
12446 The value of this function is used instead of that alignment to align
12450 ix86_constant_alignment (tree exp, int align)
12452 if (TREE_CODE (exp) == REAL_CST)
12454 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12456 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12459 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12466 /* Compute the alignment for a static variable.
12467 TYPE is the data type, and ALIGN is the alignment that
12468 the object would ordinarily have. The value of this function is used
12469 instead of that alignment to align the object. */
12472 ix86_data_alignment (tree type, int align)
12474 if (AGGREGATE_TYPE_P (type)
12475 && TYPE_SIZE (type)
12476 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12477 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12478 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12481 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12482 to 16byte boundary. */
12485 if (AGGREGATE_TYPE_P (type)
12486 && TYPE_SIZE (type)
12487 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12488 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12489 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12493 if (TREE_CODE (type) == ARRAY_TYPE)
12495 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12497 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12500 else if (TREE_CODE (type) == COMPLEX_TYPE)
12503 if (TYPE_MODE (type) == DCmode && align < 64)
12505 if (TYPE_MODE (type) == XCmode && align < 128)
12508 else if ((TREE_CODE (type) == RECORD_TYPE
12509 || TREE_CODE (type) == UNION_TYPE
12510 || TREE_CODE (type) == QUAL_UNION_TYPE)
12511 && TYPE_FIELDS (type))
12513 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12515 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12518 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12519 || TREE_CODE (type) == INTEGER_TYPE)
12521 if (TYPE_MODE (type) == DFmode && align < 64)
12523 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12530 /* Compute the alignment for a local variable.
12531 TYPE is the data type, and ALIGN is the alignment that
12532 the object would ordinarily have. The value of this macro is used
12533 instead of that alignment to align the object. */
12536 ix86_local_alignment (tree type, int align)
12538 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12539 to 16byte boundary. */
12542 if (AGGREGATE_TYPE_P (type)
12543 && TYPE_SIZE (type)
12544 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12545 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12546 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12549 if (TREE_CODE (type) == ARRAY_TYPE)
12551 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12553 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12556 else if (TREE_CODE (type) == COMPLEX_TYPE)
12558 if (TYPE_MODE (type) == DCmode && align < 64)
12560 if (TYPE_MODE (type) == XCmode && align < 128)
12563 else if ((TREE_CODE (type) == RECORD_TYPE
12564 || TREE_CODE (type) == UNION_TYPE
12565 || TREE_CODE (type) == QUAL_UNION_TYPE)
12566 && TYPE_FIELDS (type))
12568 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12570 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12573 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12574 || TREE_CODE (type) == INTEGER_TYPE)
12577 if (TYPE_MODE (type) == DFmode && align < 64)
12579 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12585 /* Emit RTL insns to initialize the variable parts of a trampoline.
12586 FNADDR is an RTX for the address of the function's pure code.
12587 CXT is an RTX for the static chain value for the function. */
12589 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12593 /* Compute offset from the end of the jmp to the target function. */
12594 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12595 plus_constant (tramp, 10),
12596 NULL_RTX, 1, OPTAB_DIRECT);
12597 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12598 gen_int_mode (0xb9, QImode));
12599 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12600 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12601 gen_int_mode (0xe9, QImode));
12602 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12607 /* Try to load address using shorter movl instead of movabs.
12608 We may want to support movq for kernel mode, but kernel does not use
12609 trampolines at the moment. */
12610 if (x86_64_zero_extended_value (fnaddr))
12612 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12613 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12614 gen_int_mode (0xbb41, HImode));
12615 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12616 gen_lowpart (SImode, fnaddr));
12621 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12622 gen_int_mode (0xbb49, HImode));
12623 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12627 /* Load static chain using movabs to r10. */
12628 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12629 gen_int_mode (0xba49, HImode));
12630 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12633 /* Jump to the r11 */
12634 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12635 gen_int_mode (0xff49, HImode));
12636 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12637 gen_int_mode (0xe3, QImode));
12639 if (offset > TRAMPOLINE_SIZE)
12643 #ifdef TRANSFER_FROM_TRAMPOLINE
12644 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12645 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12649 #define def_builtin(MASK, NAME, TYPE, CODE) \
12651 if ((MASK) & target_flags \
12652 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12653 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12654 NULL, NULL_TREE); \
12657 struct builtin_description
12659 const unsigned int mask;
12660 const enum insn_code icode;
12661 const char *const name;
12662 const enum ix86_builtins code;
12663 const enum rtx_code comparison;
12664 const unsigned int flag;
12667 static const struct builtin_description bdesc_comi[] =
12669 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12670 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12671 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12672 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12673 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12674 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12675 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12676 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12677 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12678 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12679 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12680 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12681 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12682 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12683 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12684 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12685 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12686 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12687 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12688 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12689 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12690 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12691 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12692 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12695 static const struct builtin_description bdesc_2arg[] =
12698 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12699 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12700 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12701 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12703 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12704 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12705 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12707 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12708 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12709 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12710 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12711 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12712 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12713 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12714 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12715 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12716 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12717 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12718 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12719 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12720 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12721 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12722 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12723 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12724 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12725 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12726 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12728 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12729 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12730 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12731 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12733 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12734 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12735 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12736 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12738 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12739 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12740 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12741 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12742 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12745 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12746 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12747 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12748 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12749 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12750 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12751 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12752 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12754 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12755 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12756 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12757 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12758 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12759 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12760 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12761 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12763 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12764 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12765 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12767 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12770 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12772 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12773 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12775 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12776 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12777 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12778 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12779 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12780 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12783 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12784 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12785 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12787 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12788 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12789 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12792 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12795 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12796 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12797 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12799 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12800 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12801 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12803 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12804 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12805 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12806 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12807 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12808 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12810 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12811 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12812 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12813 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12814 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12815 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12817 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12818 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12819 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12820 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12822 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12823 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12836 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12837 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12838 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12839 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12840 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12841 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12842 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12843 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12844 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12845 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12846 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12847 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12848 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12849 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12850 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12851 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12852 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12853 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12854 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12856 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12859 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12864 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12880 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12881 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12882 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12883 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12884 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12885 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12886 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12887 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12924 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12951 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12952 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12957 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12958 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12959 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12960 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12961 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12962 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12965 static const struct builtin_description bdesc_1arg[] =
12967 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12968 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12970 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12971 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12972 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12974 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12975 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12976 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12977 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12978 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12979 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13001 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13002 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13011 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13012 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13013 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13017 ix86_init_builtins (void)
13020 ix86_init_mmx_sse_builtins ();
13023 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13024 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13027 ix86_init_mmx_sse_builtins (void)
13029 const struct builtin_description * d;
13032 tree pchar_type_node = build_pointer_type (char_type_node);
13033 tree pcchar_type_node = build_pointer_type (
13034 build_type_variant (char_type_node, 1, 0));
13035 tree pfloat_type_node = build_pointer_type (float_type_node);
13036 tree pcfloat_type_node = build_pointer_type (
13037 build_type_variant (float_type_node, 1, 0));
13038 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13039 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13040 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13043 tree int_ftype_v4sf_v4sf
13044 = build_function_type_list (integer_type_node,
13045 V4SF_type_node, V4SF_type_node, NULL_TREE);
13046 tree v4si_ftype_v4sf_v4sf
13047 = build_function_type_list (V4SI_type_node,
13048 V4SF_type_node, V4SF_type_node, NULL_TREE);
13049 /* MMX/SSE/integer conversions. */
13050 tree int_ftype_v4sf
13051 = build_function_type_list (integer_type_node,
13052 V4SF_type_node, NULL_TREE);
13053 tree int64_ftype_v4sf
13054 = build_function_type_list (long_long_integer_type_node,
13055 V4SF_type_node, NULL_TREE);
13056 tree int_ftype_v8qi
13057 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13058 tree v4sf_ftype_v4sf_int
13059 = build_function_type_list (V4SF_type_node,
13060 V4SF_type_node, integer_type_node, NULL_TREE);
13061 tree v4sf_ftype_v4sf_int64
13062 = build_function_type_list (V4SF_type_node,
13063 V4SF_type_node, long_long_integer_type_node,
13065 tree v4sf_ftype_v4sf_v2si
13066 = build_function_type_list (V4SF_type_node,
13067 V4SF_type_node, V2SI_type_node, NULL_TREE);
13068 tree int_ftype_v4hi_int
13069 = build_function_type_list (integer_type_node,
13070 V4HI_type_node, integer_type_node, NULL_TREE);
13071 tree v4hi_ftype_v4hi_int_int
13072 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13073 integer_type_node, integer_type_node,
13075 /* Miscellaneous. */
13076 tree v8qi_ftype_v4hi_v4hi
13077 = build_function_type_list (V8QI_type_node,
13078 V4HI_type_node, V4HI_type_node, NULL_TREE);
13079 tree v4hi_ftype_v2si_v2si
13080 = build_function_type_list (V4HI_type_node,
13081 V2SI_type_node, V2SI_type_node, NULL_TREE);
13082 tree v4sf_ftype_v4sf_v4sf_int
13083 = build_function_type_list (V4SF_type_node,
13084 V4SF_type_node, V4SF_type_node,
13085 integer_type_node, NULL_TREE);
13086 tree v2si_ftype_v4hi_v4hi
13087 = build_function_type_list (V2SI_type_node,
13088 V4HI_type_node, V4HI_type_node, NULL_TREE);
13089 tree v4hi_ftype_v4hi_int
13090 = build_function_type_list (V4HI_type_node,
13091 V4HI_type_node, integer_type_node, NULL_TREE);
13092 tree v4hi_ftype_v4hi_di
13093 = build_function_type_list (V4HI_type_node,
13094 V4HI_type_node, long_long_unsigned_type_node,
13096 tree v2si_ftype_v2si_di
13097 = build_function_type_list (V2SI_type_node,
13098 V2SI_type_node, long_long_unsigned_type_node,
13100 tree void_ftype_void
13101 = build_function_type (void_type_node, void_list_node);
13102 tree void_ftype_unsigned
13103 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13104 tree void_ftype_unsigned_unsigned
13105 = build_function_type_list (void_type_node, unsigned_type_node,
13106 unsigned_type_node, NULL_TREE);
13107 tree void_ftype_pcvoid_unsigned_unsigned
13108 = build_function_type_list (void_type_node, const_ptr_type_node,
13109 unsigned_type_node, unsigned_type_node,
13111 tree unsigned_ftype_void
13112 = build_function_type (unsigned_type_node, void_list_node);
13114 = build_function_type (long_long_unsigned_type_node, void_list_node);
13115 tree v4sf_ftype_void
13116 = build_function_type (V4SF_type_node, void_list_node);
13117 tree v2si_ftype_v4sf
13118 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13119 /* Loads/stores. */
13120 tree void_ftype_v8qi_v8qi_pchar
13121 = build_function_type_list (void_type_node,
13122 V8QI_type_node, V8QI_type_node,
13123 pchar_type_node, NULL_TREE);
13124 tree v4sf_ftype_pcfloat
13125 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13126 /* @@@ the type is bogus */
13127 tree v4sf_ftype_v4sf_pv2si
13128 = build_function_type_list (V4SF_type_node,
13129 V4SF_type_node, pv2si_type_node, NULL_TREE);
13130 tree void_ftype_pv2si_v4sf
13131 = build_function_type_list (void_type_node,
13132 pv2si_type_node, V4SF_type_node, NULL_TREE);
13133 tree void_ftype_pfloat_v4sf
13134 = build_function_type_list (void_type_node,
13135 pfloat_type_node, V4SF_type_node, NULL_TREE);
13136 tree void_ftype_pdi_di
13137 = build_function_type_list (void_type_node,
13138 pdi_type_node, long_long_unsigned_type_node,
13140 tree void_ftype_pv2di_v2di
13141 = build_function_type_list (void_type_node,
13142 pv2di_type_node, V2DI_type_node, NULL_TREE);
13143 /* Normal vector unops. */
13144 tree v4sf_ftype_v4sf
13145 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13147 /* Normal vector binops. */
13148 tree v4sf_ftype_v4sf_v4sf
13149 = build_function_type_list (V4SF_type_node,
13150 V4SF_type_node, V4SF_type_node, NULL_TREE);
13151 tree v8qi_ftype_v8qi_v8qi
13152 = build_function_type_list (V8QI_type_node,
13153 V8QI_type_node, V8QI_type_node, NULL_TREE);
13154 tree v4hi_ftype_v4hi_v4hi
13155 = build_function_type_list (V4HI_type_node,
13156 V4HI_type_node, V4HI_type_node, NULL_TREE);
13157 tree v2si_ftype_v2si_v2si
13158 = build_function_type_list (V2SI_type_node,
13159 V2SI_type_node, V2SI_type_node, NULL_TREE);
13160 tree di_ftype_di_di
13161 = build_function_type_list (long_long_unsigned_type_node,
13162 long_long_unsigned_type_node,
13163 long_long_unsigned_type_node, NULL_TREE);
13165 tree v2si_ftype_v2sf
13166 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13167 tree v2sf_ftype_v2si
13168 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13169 tree v2si_ftype_v2si
13170 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13171 tree v2sf_ftype_v2sf
13172 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13173 tree v2sf_ftype_v2sf_v2sf
13174 = build_function_type_list (V2SF_type_node,
13175 V2SF_type_node, V2SF_type_node, NULL_TREE);
13176 tree v2si_ftype_v2sf_v2sf
13177 = build_function_type_list (V2SI_type_node,
13178 V2SF_type_node, V2SF_type_node, NULL_TREE);
13179 tree pint_type_node = build_pointer_type (integer_type_node);
13180 tree pcint_type_node = build_pointer_type (
13181 build_type_variant (integer_type_node, 1, 0));
13182 tree pdouble_type_node = build_pointer_type (double_type_node);
13183 tree pcdouble_type_node = build_pointer_type (
13184 build_type_variant (double_type_node, 1, 0));
13185 tree int_ftype_v2df_v2df
13186 = build_function_type_list (integer_type_node,
13187 V2DF_type_node, V2DF_type_node, NULL_TREE);
13190 = build_function_type (intTI_type_node, void_list_node);
13191 tree v2di_ftype_void
13192 = build_function_type (V2DI_type_node, void_list_node);
13193 tree ti_ftype_ti_ti
13194 = build_function_type_list (intTI_type_node,
13195 intTI_type_node, intTI_type_node, NULL_TREE);
13196 tree void_ftype_pcvoid
13197 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13199 = build_function_type_list (V2DI_type_node,
13200 long_long_unsigned_type_node, NULL_TREE);
13202 = build_function_type_list (long_long_unsigned_type_node,
13203 V2DI_type_node, NULL_TREE);
13204 tree v4sf_ftype_v4si
13205 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13206 tree v4si_ftype_v4sf
13207 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13208 tree v2df_ftype_v4si
13209 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13210 tree v4si_ftype_v2df
13211 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13212 tree v2si_ftype_v2df
13213 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13214 tree v4sf_ftype_v2df
13215 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13216 tree v2df_ftype_v2si
13217 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13218 tree v2df_ftype_v4sf
13219 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13220 tree int_ftype_v2df
13221 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13222 tree int64_ftype_v2df
13223 = build_function_type_list (long_long_integer_type_node,
13224 V2DF_type_node, NULL_TREE);
13225 tree v2df_ftype_v2df_int
13226 = build_function_type_list (V2DF_type_node,
13227 V2DF_type_node, integer_type_node, NULL_TREE);
13228 tree v2df_ftype_v2df_int64
13229 = build_function_type_list (V2DF_type_node,
13230 V2DF_type_node, long_long_integer_type_node,
13232 tree v4sf_ftype_v4sf_v2df
13233 = build_function_type_list (V4SF_type_node,
13234 V4SF_type_node, V2DF_type_node, NULL_TREE);
13235 tree v2df_ftype_v2df_v4sf
13236 = build_function_type_list (V2DF_type_node,
13237 V2DF_type_node, V4SF_type_node, NULL_TREE);
13238 tree v2df_ftype_v2df_v2df_int
13239 = build_function_type_list (V2DF_type_node,
13240 V2DF_type_node, V2DF_type_node,
13243 tree v2df_ftype_v2df_pv2si
13244 = build_function_type_list (V2DF_type_node,
13245 V2DF_type_node, pv2si_type_node, NULL_TREE);
13246 tree void_ftype_pv2si_v2df
13247 = build_function_type_list (void_type_node,
13248 pv2si_type_node, V2DF_type_node, NULL_TREE);
13249 tree void_ftype_pdouble_v2df
13250 = build_function_type_list (void_type_node,
13251 pdouble_type_node, V2DF_type_node, NULL_TREE);
13252 tree void_ftype_pint_int
13253 = build_function_type_list (void_type_node,
13254 pint_type_node, integer_type_node, NULL_TREE);
13255 tree void_ftype_v16qi_v16qi_pchar
13256 = build_function_type_list (void_type_node,
13257 V16QI_type_node, V16QI_type_node,
13258 pchar_type_node, NULL_TREE);
13259 tree v2df_ftype_pcdouble
13260 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13261 tree v2df_ftype_v2df_v2df
13262 = build_function_type_list (V2DF_type_node,
13263 V2DF_type_node, V2DF_type_node, NULL_TREE);
13264 tree v16qi_ftype_v16qi_v16qi
13265 = build_function_type_list (V16QI_type_node,
13266 V16QI_type_node, V16QI_type_node, NULL_TREE);
13267 tree v8hi_ftype_v8hi_v8hi
13268 = build_function_type_list (V8HI_type_node,
13269 V8HI_type_node, V8HI_type_node, NULL_TREE);
13270 tree v4si_ftype_v4si_v4si
13271 = build_function_type_list (V4SI_type_node,
13272 V4SI_type_node, V4SI_type_node, NULL_TREE);
13273 tree v2di_ftype_v2di_v2di
13274 = build_function_type_list (V2DI_type_node,
13275 V2DI_type_node, V2DI_type_node, NULL_TREE);
13276 tree v2di_ftype_v2df_v2df
13277 = build_function_type_list (V2DI_type_node,
13278 V2DF_type_node, V2DF_type_node, NULL_TREE);
13279 tree v2df_ftype_v2df
13280 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13281 tree v2df_ftype_double
13282 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13283 tree v2df_ftype_double_double
13284 = build_function_type_list (V2DF_type_node,
13285 double_type_node, double_type_node, NULL_TREE);
13286 tree int_ftype_v8hi_int
13287 = build_function_type_list (integer_type_node,
13288 V8HI_type_node, integer_type_node, NULL_TREE);
13289 tree v8hi_ftype_v8hi_int_int
13290 = build_function_type_list (V8HI_type_node,
13291 V8HI_type_node, integer_type_node,
13292 integer_type_node, NULL_TREE);
13293 tree v2di_ftype_v2di_int
13294 = build_function_type_list (V2DI_type_node,
13295 V2DI_type_node, integer_type_node, NULL_TREE);
13296 tree v4si_ftype_v4si_int
13297 = build_function_type_list (V4SI_type_node,
13298 V4SI_type_node, integer_type_node, NULL_TREE);
13299 tree v8hi_ftype_v8hi_int
13300 = build_function_type_list (V8HI_type_node,
13301 V8HI_type_node, integer_type_node, NULL_TREE);
13302 tree v8hi_ftype_v8hi_v2di
13303 = build_function_type_list (V8HI_type_node,
13304 V8HI_type_node, V2DI_type_node, NULL_TREE);
13305 tree v4si_ftype_v4si_v2di
13306 = build_function_type_list (V4SI_type_node,
13307 V4SI_type_node, V2DI_type_node, NULL_TREE);
13308 tree v4si_ftype_v8hi_v8hi
13309 = build_function_type_list (V4SI_type_node,
13310 V8HI_type_node, V8HI_type_node, NULL_TREE);
13311 tree di_ftype_v8qi_v8qi
13312 = build_function_type_list (long_long_unsigned_type_node,
13313 V8QI_type_node, V8QI_type_node, NULL_TREE);
13314 tree v2di_ftype_v16qi_v16qi
13315 = build_function_type_list (V2DI_type_node,
13316 V16QI_type_node, V16QI_type_node, NULL_TREE);
13317 tree int_ftype_v16qi
13318 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13319 tree v16qi_ftype_pcchar
13320 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13321 tree void_ftype_pchar_v16qi
13322 = build_function_type_list (void_type_node,
13323 pchar_type_node, V16QI_type_node, NULL_TREE);
13324 tree v4si_ftype_pcint
13325 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13326 tree void_ftype_pcint_v4si
13327 = build_function_type_list (void_type_node,
13328 pcint_type_node, V4SI_type_node, NULL_TREE);
13329 tree v2di_ftype_v2di
13330 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13333 tree float128_type;
13335 /* The __float80 type. */
13336 if (TYPE_MODE (long_double_type_node) == XFmode)
13337 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13341 /* The __float80 type. */
13342 float80_type = make_node (REAL_TYPE);
13343 TYPE_PRECISION (float80_type) = 96;
13344 layout_type (float80_type);
13345 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13348 float128_type = make_node (REAL_TYPE);
13349 TYPE_PRECISION (float128_type) = 128;
13350 layout_type (float128_type);
13351 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13353 /* Add all builtins that are more or less simple operations on two
13355 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13357 /* Use one of the operands; the target can have a different mode for
13358 mask-generating compares. */
13359 enum machine_mode mode;
13364 mode = insn_data[d->icode].operand[1].mode;
13369 type = v16qi_ftype_v16qi_v16qi;
13372 type = v8hi_ftype_v8hi_v8hi;
13375 type = v4si_ftype_v4si_v4si;
13378 type = v2di_ftype_v2di_v2di;
13381 type = v2df_ftype_v2df_v2df;
13384 type = ti_ftype_ti_ti;
13387 type = v4sf_ftype_v4sf_v4sf;
13390 type = v8qi_ftype_v8qi_v8qi;
13393 type = v4hi_ftype_v4hi_v4hi;
13396 type = v2si_ftype_v2si_v2si;
13399 type = di_ftype_di_di;
13406 /* Override for comparisons. */
13407 if (d->icode == CODE_FOR_maskcmpv4sf3
13408 || d->icode == CODE_FOR_maskncmpv4sf3
13409 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13410 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13411 type = v4si_ftype_v4sf_v4sf;
13413 if (d->icode == CODE_FOR_maskcmpv2df3
13414 || d->icode == CODE_FOR_maskncmpv2df3
13415 || d->icode == CODE_FOR_vmmaskcmpv2df3
13416 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13417 type = v2di_ftype_v2df_v2df;
13419 def_builtin (d->mask, d->name, type, d->code);
13422 /* Add the remaining MMX insns with somewhat more complicated types. */
13423 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13424 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13425 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13426 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13427 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13429 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13430 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13431 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13433 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13434 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13436 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13437 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13439 /* comi/ucomi insns. */
13440 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13441 if (d->mask == MASK_SSE2)
13442 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13444 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13446 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13447 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13448 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13450 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13451 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13452 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13453 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13454 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13455 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13456 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13457 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13458 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13459 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13460 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13462 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13463 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13465 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13467 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13468 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13469 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13470 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13471 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13472 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13474 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13475 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13476 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13477 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13479 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13480 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13481 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13482 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13484 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13486 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13488 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13489 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13490 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13491 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13492 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13493 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13495 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13497 /* Original 3DNow! */
13498 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13499 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13500 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13511 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13512 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13513 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13514 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13515 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13516 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13517 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13519 /* 3DNow! extension as used in the Athlon CPU. */
13520 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13521 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13522 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13523 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13524 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13525 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13527 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13531 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13535 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13552 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13578 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13579 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13586 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13610 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13614 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13638 /* Prescott New Instructions. */
13639 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13640 void_ftype_pcvoid_unsigned_unsigned,
13641 IX86_BUILTIN_MONITOR);
13642 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13643 void_ftype_unsigned_unsigned,
13644 IX86_BUILTIN_MWAIT);
13645 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13647 IX86_BUILTIN_MOVSHDUP);
13648 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13650 IX86_BUILTIN_MOVSLDUP);
13651 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13652 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13653 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13654 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13655 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13656 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13659 /* Errors in the source file can cause expand_expr to return const0_rtx
13660 where we expect a vector. To avoid crashing, use one of the vector
13661 clear instructions. */
13663 safe_vector_operand (rtx x, enum machine_mode mode)
13665 if (x != const0_rtx)
13667 x = gen_reg_rtx (mode);
13669 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13670 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13671 : gen_rtx_SUBREG (DImode, x, 0)));
13673 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13674 : gen_rtx_SUBREG (V4SFmode, x, 0),
13675 CONST0_RTX (V4SFmode)));
13679 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13682 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13685 tree arg0 = TREE_VALUE (arglist);
13686 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13687 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13688 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13689 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13690 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13691 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13693 if (VECTOR_MODE_P (mode0))
13694 op0 = safe_vector_operand (op0, mode0);
13695 if (VECTOR_MODE_P (mode1))
13696 op1 = safe_vector_operand (op1, mode1);
13699 || GET_MODE (target) != tmode
13700 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13701 target = gen_reg_rtx (tmode);
13703 if (GET_MODE (op1) == SImode && mode1 == TImode)
13705 rtx x = gen_reg_rtx (V4SImode);
13706 emit_insn (gen_sse2_loadd (x, op1));
13707 op1 = gen_lowpart (TImode, x);
13710 /* In case the insn wants input operands in modes different from
13711 the result, abort. */
13712 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13713 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13716 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13717 op0 = copy_to_mode_reg (mode0, op0);
13718 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13719 op1 = copy_to_mode_reg (mode1, op1);
13721 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13722 yet one of the two must not be a memory. This is normally enforced
13723 by expanders, but we didn't bother to create one here. */
13724 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13725 op0 = copy_to_mode_reg (mode0, op0);
13727 pat = GEN_FCN (icode) (target, op0, op1);
13734 /* Subroutine of ix86_expand_builtin to take care of stores. */
13737 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13740 tree arg0 = TREE_VALUE (arglist);
13741 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13742 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13743 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13744 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13745 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13747 if (VECTOR_MODE_P (mode1))
13748 op1 = safe_vector_operand (op1, mode1);
13750 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13751 op1 = copy_to_mode_reg (mode1, op1);
13753 pat = GEN_FCN (icode) (op0, op1);
13759 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13762 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13763 rtx target, int do_load)
13766 tree arg0 = TREE_VALUE (arglist);
13767 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13768 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13769 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13772 || GET_MODE (target) != tmode
13773 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13774 target = gen_reg_rtx (tmode);
13776 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13779 if (VECTOR_MODE_P (mode0))
13780 op0 = safe_vector_operand (op0, mode0);
13782 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13783 op0 = copy_to_mode_reg (mode0, op0);
13786 pat = GEN_FCN (icode) (target, op0);
13793 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13794 sqrtss, rsqrtss, rcpss. */
13797 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13800 tree arg0 = TREE_VALUE (arglist);
13801 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13802 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13803 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13806 || GET_MODE (target) != tmode
13807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13808 target = gen_reg_rtx (tmode);
13810 if (VECTOR_MODE_P (mode0))
13811 op0 = safe_vector_operand (op0, mode0);
13813 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13814 op0 = copy_to_mode_reg (mode0, op0);
13817 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13818 op1 = copy_to_mode_reg (mode0, op1);
13820 pat = GEN_FCN (icode) (target, op0, op1);
13827 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13830 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13834 tree arg0 = TREE_VALUE (arglist);
13835 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13836 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13837 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13839 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13840 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13841 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13842 enum rtx_code comparison = d->comparison;
13844 if (VECTOR_MODE_P (mode0))
13845 op0 = safe_vector_operand (op0, mode0);
13846 if (VECTOR_MODE_P (mode1))
13847 op1 = safe_vector_operand (op1, mode1);
13849 /* Swap operands if we have a comparison that isn't available in
13853 rtx tmp = gen_reg_rtx (mode1);
13854 emit_move_insn (tmp, op1);
13860 || GET_MODE (target) != tmode
13861 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13862 target = gen_reg_rtx (tmode);
13864 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13865 op0 = copy_to_mode_reg (mode0, op0);
13866 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13867 op1 = copy_to_mode_reg (mode1, op1);
13869 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13870 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13877 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13880 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13884 tree arg0 = TREE_VALUE (arglist);
13885 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13886 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13887 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13889 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13890 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13891 enum rtx_code comparison = d->comparison;
13893 if (VECTOR_MODE_P (mode0))
13894 op0 = safe_vector_operand (op0, mode0);
13895 if (VECTOR_MODE_P (mode1))
13896 op1 = safe_vector_operand (op1, mode1);
13898 /* Swap operands if we have a comparison that isn't available in
13907 target = gen_reg_rtx (SImode);
13908 emit_move_insn (target, const0_rtx);
13909 target = gen_rtx_SUBREG (QImode, target, 0);
13911 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13912 op0 = copy_to_mode_reg (mode0, op0);
13913 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13914 op1 = copy_to_mode_reg (mode1, op1);
13916 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13917 pat = GEN_FCN (d->icode) (op0, op1);
13921 emit_insn (gen_rtx_SET (VOIDmode,
13922 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13923 gen_rtx_fmt_ee (comparison, QImode,
13927 return SUBREG_REG (target);
13930 /* Expand an expression EXP that calls a built-in function,
13931 with result going to TARGET if that's convenient
13932 (and in mode MODE if that's convenient).
13933 SUBTARGET may be used as the target for computing one of EXP's operands.
13934 IGNORE is nonzero if the value is to be ignored. */
13937 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13938 enum machine_mode mode ATTRIBUTE_UNUSED,
13939 int ignore ATTRIBUTE_UNUSED)
13941 const struct builtin_description *d;
13943 enum insn_code icode;
13944 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13945 tree arglist = TREE_OPERAND (exp, 1);
13946 tree arg0, arg1, arg2;
13947 rtx op0, op1, op2, pat;
13948 enum machine_mode tmode, mode0, mode1, mode2;
13949 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13953 case IX86_BUILTIN_EMMS:
13954 emit_insn (gen_emms ());
13957 case IX86_BUILTIN_SFENCE:
13958 emit_insn (gen_sfence ());
13961 case IX86_BUILTIN_PEXTRW:
13962 case IX86_BUILTIN_PEXTRW128:
13963 icode = (fcode == IX86_BUILTIN_PEXTRW
13964 ? CODE_FOR_mmx_pextrw
13965 : CODE_FOR_sse2_pextrw);
13966 arg0 = TREE_VALUE (arglist);
13967 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13968 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13969 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13970 tmode = insn_data[icode].operand[0].mode;
13971 mode0 = insn_data[icode].operand[1].mode;
13972 mode1 = insn_data[icode].operand[2].mode;
13974 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13975 op0 = copy_to_mode_reg (mode0, op0);
13976 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13978 error ("selector must be an integer constant in the range 0..%i",
13979 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13980 return gen_reg_rtx (tmode);
13983 || GET_MODE (target) != tmode
13984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13985 target = gen_reg_rtx (tmode);
13986 pat = GEN_FCN (icode) (target, op0, op1);
13992 case IX86_BUILTIN_PINSRW:
13993 case IX86_BUILTIN_PINSRW128:
13994 icode = (fcode == IX86_BUILTIN_PINSRW
13995 ? CODE_FOR_mmx_pinsrw
13996 : CODE_FOR_sse2_pinsrw);
13997 arg0 = TREE_VALUE (arglist);
13998 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13999 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14000 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14001 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14002 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14003 tmode = insn_data[icode].operand[0].mode;
14004 mode0 = insn_data[icode].operand[1].mode;
14005 mode1 = insn_data[icode].operand[2].mode;
14006 mode2 = insn_data[icode].operand[3].mode;
14008 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14009 op0 = copy_to_mode_reg (mode0, op0);
14010 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14011 op1 = copy_to_mode_reg (mode1, op1);
14012 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14014 error ("selector must be an integer constant in the range 0..%i",
14015 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14019 || GET_MODE (target) != tmode
14020 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14021 target = gen_reg_rtx (tmode);
14022 pat = GEN_FCN (icode) (target, op0, op1, op2);
14028 case IX86_BUILTIN_MASKMOVQ:
14029 case IX86_BUILTIN_MASKMOVDQU:
14030 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14031 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14032 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14033 : CODE_FOR_sse2_maskmovdqu));
14034 /* Note the arg order is different from the operand order. */
14035 arg1 = TREE_VALUE (arglist);
14036 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14037 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14038 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14039 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14040 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14041 mode0 = insn_data[icode].operand[0].mode;
14042 mode1 = insn_data[icode].operand[1].mode;
14043 mode2 = insn_data[icode].operand[2].mode;
14045 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14046 op0 = copy_to_mode_reg (mode0, op0);
14047 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14048 op1 = copy_to_mode_reg (mode1, op1);
14049 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14050 op2 = copy_to_mode_reg (mode2, op2);
14051 pat = GEN_FCN (icode) (op0, op1, op2);
14057 case IX86_BUILTIN_SQRTSS:
14058 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14059 case IX86_BUILTIN_RSQRTSS:
14060 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14061 case IX86_BUILTIN_RCPSS:
14062 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14064 case IX86_BUILTIN_LOADAPS:
14065 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14067 case IX86_BUILTIN_LOADUPS:
14068 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14070 case IX86_BUILTIN_STOREAPS:
14071 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14073 case IX86_BUILTIN_STOREUPS:
14074 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14076 case IX86_BUILTIN_LOADSS:
14077 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14079 case IX86_BUILTIN_STORESS:
14080 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14082 case IX86_BUILTIN_LOADHPS:
14083 case IX86_BUILTIN_LOADLPS:
14084 case IX86_BUILTIN_LOADHPD:
14085 case IX86_BUILTIN_LOADLPD:
14086 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14087 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14088 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14089 : CODE_FOR_sse2_movlpd);
14090 arg0 = TREE_VALUE (arglist);
14091 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14092 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14093 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14094 tmode = insn_data[icode].operand[0].mode;
14095 mode0 = insn_data[icode].operand[1].mode;
14096 mode1 = insn_data[icode].operand[2].mode;
14098 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14099 op0 = copy_to_mode_reg (mode0, op0);
14100 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14102 || GET_MODE (target) != tmode
14103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14104 target = gen_reg_rtx (tmode);
14105 pat = GEN_FCN (icode) (target, op0, op1);
14111 case IX86_BUILTIN_STOREHPS:
14112 case IX86_BUILTIN_STORELPS:
14113 case IX86_BUILTIN_STOREHPD:
14114 case IX86_BUILTIN_STORELPD:
14115 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14116 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14117 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14118 : CODE_FOR_sse2_movlpd);
14119 arg0 = TREE_VALUE (arglist);
14120 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14121 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14122 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14123 mode0 = insn_data[icode].operand[1].mode;
14124 mode1 = insn_data[icode].operand[2].mode;
14126 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14127 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14128 op1 = copy_to_mode_reg (mode1, op1);
14130 pat = GEN_FCN (icode) (op0, op0, op1);
14136 case IX86_BUILTIN_MOVNTPS:
14137 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14138 case IX86_BUILTIN_MOVNTQ:
14139 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14141 case IX86_BUILTIN_LDMXCSR:
14142 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14143 target = assign_386_stack_local (SImode, 0);
14144 emit_move_insn (target, op0);
14145 emit_insn (gen_ldmxcsr (target));
14148 case IX86_BUILTIN_STMXCSR:
14149 target = assign_386_stack_local (SImode, 0);
14150 emit_insn (gen_stmxcsr (target));
14151 return copy_to_mode_reg (SImode, target);
14153 case IX86_BUILTIN_SHUFPS:
14154 case IX86_BUILTIN_SHUFPD:
14155 icode = (fcode == IX86_BUILTIN_SHUFPS
14156 ? CODE_FOR_sse_shufps
14157 : CODE_FOR_sse2_shufpd);
14158 arg0 = TREE_VALUE (arglist);
14159 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14160 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14161 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14162 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14163 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14164 tmode = insn_data[icode].operand[0].mode;
14165 mode0 = insn_data[icode].operand[1].mode;
14166 mode1 = insn_data[icode].operand[2].mode;
14167 mode2 = insn_data[icode].operand[3].mode;
14169 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14170 op0 = copy_to_mode_reg (mode0, op0);
14171 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14172 op1 = copy_to_mode_reg (mode1, op1);
14173 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14175 /* @@@ better error message */
14176 error ("mask must be an immediate");
14177 return gen_reg_rtx (tmode);
14180 || GET_MODE (target) != tmode
14181 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14182 target = gen_reg_rtx (tmode);
14183 pat = GEN_FCN (icode) (target, op0, op1, op2);
14189 case IX86_BUILTIN_PSHUFW:
14190 case IX86_BUILTIN_PSHUFD:
14191 case IX86_BUILTIN_PSHUFHW:
14192 case IX86_BUILTIN_PSHUFLW:
14193 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14194 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14195 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14196 : CODE_FOR_mmx_pshufw);
14197 arg0 = TREE_VALUE (arglist);
14198 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14199 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14200 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14201 tmode = insn_data[icode].operand[0].mode;
14202 mode1 = insn_data[icode].operand[1].mode;
14203 mode2 = insn_data[icode].operand[2].mode;
14205 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14206 op0 = copy_to_mode_reg (mode1, op0);
14207 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14209 /* @@@ better error message */
14210 error ("mask must be an immediate");
14214 || GET_MODE (target) != tmode
14215 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14216 target = gen_reg_rtx (tmode);
14217 pat = GEN_FCN (icode) (target, op0, op1);
14223 case IX86_BUILTIN_PSLLDQI128:
14224 case IX86_BUILTIN_PSRLDQI128:
14225 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14226 : CODE_FOR_sse2_lshrti3);
14227 arg0 = TREE_VALUE (arglist);
14228 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14229 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14230 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14231 tmode = insn_data[icode].operand[0].mode;
14232 mode1 = insn_data[icode].operand[1].mode;
14233 mode2 = insn_data[icode].operand[2].mode;
14235 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14237 op0 = copy_to_reg (op0);
14238 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14240 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14242 error ("shift must be an immediate");
14245 target = gen_reg_rtx (V2DImode);
14246 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14252 case IX86_BUILTIN_FEMMS:
14253 emit_insn (gen_femms ());
14256 case IX86_BUILTIN_PAVGUSB:
14257 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14259 case IX86_BUILTIN_PF2ID:
14260 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14262 case IX86_BUILTIN_PFACC:
14263 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14265 case IX86_BUILTIN_PFADD:
14266 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14268 case IX86_BUILTIN_PFCMPEQ:
14269 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14271 case IX86_BUILTIN_PFCMPGE:
14272 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14274 case IX86_BUILTIN_PFCMPGT:
14275 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14277 case IX86_BUILTIN_PFMAX:
14278 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14280 case IX86_BUILTIN_PFMIN:
14281 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14283 case IX86_BUILTIN_PFMUL:
14284 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14286 case IX86_BUILTIN_PFRCP:
14287 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14289 case IX86_BUILTIN_PFRCPIT1:
14290 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14292 case IX86_BUILTIN_PFRCPIT2:
14293 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14295 case IX86_BUILTIN_PFRSQIT1:
14296 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14298 case IX86_BUILTIN_PFRSQRT:
14299 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14301 case IX86_BUILTIN_PFSUB:
14302 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14304 case IX86_BUILTIN_PFSUBR:
14305 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14307 case IX86_BUILTIN_PI2FD:
14308 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14310 case IX86_BUILTIN_PMULHRW:
14311 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14313 case IX86_BUILTIN_PF2IW:
14314 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14316 case IX86_BUILTIN_PFNACC:
14317 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14319 case IX86_BUILTIN_PFPNACC:
14320 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14322 case IX86_BUILTIN_PI2FW:
14323 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14325 case IX86_BUILTIN_PSWAPDSI:
14326 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14328 case IX86_BUILTIN_PSWAPDSF:
14329 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14331 case IX86_BUILTIN_SSE_ZERO:
14332 target = gen_reg_rtx (V4SFmode);
14333 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14336 case IX86_BUILTIN_MMX_ZERO:
14337 target = gen_reg_rtx (DImode);
14338 emit_insn (gen_mmx_clrdi (target));
14341 case IX86_BUILTIN_CLRTI:
14342 target = gen_reg_rtx (V2DImode);
14343 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14347 case IX86_BUILTIN_SQRTSD:
14348 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14349 case IX86_BUILTIN_LOADAPD:
14350 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14351 case IX86_BUILTIN_LOADUPD:
14352 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14354 case IX86_BUILTIN_STOREAPD:
14355 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14356 case IX86_BUILTIN_STOREUPD:
14357 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14359 case IX86_BUILTIN_LOADSD:
14360 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14362 case IX86_BUILTIN_STORESD:
14363 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14365 case IX86_BUILTIN_SETPD1:
14366 target = assign_386_stack_local (DFmode, 0);
14367 arg0 = TREE_VALUE (arglist);
14368 emit_move_insn (adjust_address (target, DFmode, 0),
14369 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14370 op0 = gen_reg_rtx (V2DFmode);
14371 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14372 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14375 case IX86_BUILTIN_SETPD:
14376 target = assign_386_stack_local (V2DFmode, 0);
14377 arg0 = TREE_VALUE (arglist);
14378 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14379 emit_move_insn (adjust_address (target, DFmode, 0),
14380 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14381 emit_move_insn (adjust_address (target, DFmode, 8),
14382 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14383 op0 = gen_reg_rtx (V2DFmode);
14384 emit_insn (gen_sse2_movapd (op0, target));
14387 case IX86_BUILTIN_LOADRPD:
14388 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14389 gen_reg_rtx (V2DFmode), 1);
14390 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14393 case IX86_BUILTIN_LOADPD1:
14394 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14395 gen_reg_rtx (V2DFmode), 1);
14396 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14399 case IX86_BUILTIN_STOREPD1:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14401 case IX86_BUILTIN_STORERPD:
14402 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14404 case IX86_BUILTIN_CLRPD:
14405 target = gen_reg_rtx (V2DFmode);
14406 emit_insn (gen_sse_clrv2df (target));
14409 case IX86_BUILTIN_MFENCE:
14410 emit_insn (gen_sse2_mfence ());
14412 case IX86_BUILTIN_LFENCE:
14413 emit_insn (gen_sse2_lfence ());
14416 case IX86_BUILTIN_CLFLUSH:
14417 arg0 = TREE_VALUE (arglist);
14418 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14419 icode = CODE_FOR_sse2_clflush;
14420 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14421 op0 = copy_to_mode_reg (Pmode, op0);
14423 emit_insn (gen_sse2_clflush (op0));
14426 case IX86_BUILTIN_MOVNTPD:
14427 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14428 case IX86_BUILTIN_MOVNTDQ:
14429 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14430 case IX86_BUILTIN_MOVNTI:
14431 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14433 case IX86_BUILTIN_LOADDQA:
14434 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14435 case IX86_BUILTIN_LOADDQU:
14436 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14437 case IX86_BUILTIN_LOADD:
14438 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14440 case IX86_BUILTIN_STOREDQA:
14441 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14442 case IX86_BUILTIN_STOREDQU:
14443 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14444 case IX86_BUILTIN_STORED:
14445 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14447 case IX86_BUILTIN_MONITOR:
14448 arg0 = TREE_VALUE (arglist);
14449 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14450 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14451 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14452 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14453 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14455 op0 = copy_to_mode_reg (SImode, op0);
14457 op1 = copy_to_mode_reg (SImode, op1);
14459 op2 = copy_to_mode_reg (SImode, op2);
14460 emit_insn (gen_monitor (op0, op1, op2));
14463 case IX86_BUILTIN_MWAIT:
14464 arg0 = TREE_VALUE (arglist);
14465 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14466 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14467 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14469 op0 = copy_to_mode_reg (SImode, op0);
14471 op1 = copy_to_mode_reg (SImode, op1);
14472 emit_insn (gen_mwait (op0, op1));
14475 case IX86_BUILTIN_LOADDDUP:
14476 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14478 case IX86_BUILTIN_LDDQU:
14479 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14486 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14487 if (d->code == fcode)
14489 /* Compares are treated specially. */
14490 if (d->icode == CODE_FOR_maskcmpv4sf3
14491 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14492 || d->icode == CODE_FOR_maskncmpv4sf3
14493 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14494 || d->icode == CODE_FOR_maskcmpv2df3
14495 || d->icode == CODE_FOR_vmmaskcmpv2df3
14496 || d->icode == CODE_FOR_maskncmpv2df3
14497 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14498 return ix86_expand_sse_compare (d, arglist, target);
14500 return ix86_expand_binop_builtin (d->icode, arglist, target);
14503 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14504 if (d->code == fcode)
14505 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14507 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14508 if (d->code == fcode)
14509 return ix86_expand_sse_comi (d, arglist, target);
14511 /* @@@ Should really do something sensible here. */
14515 /* Store OPERAND to the memory after reload is completed. This means
14516 that we can't easily use assign_stack_local. */
14518 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14521 if (!reload_completed)
14523 if (TARGET_RED_ZONE)
14525 result = gen_rtx_MEM (mode,
14526 gen_rtx_PLUS (Pmode,
14528 GEN_INT (-RED_ZONE_SIZE)));
14529 emit_move_insn (result, operand);
14531 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14537 operand = gen_lowpart (DImode, operand);
14541 gen_rtx_SET (VOIDmode,
14542 gen_rtx_MEM (DImode,
14543 gen_rtx_PRE_DEC (DImode,
14544 stack_pointer_rtx)),
14550 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14559 split_di (&operand, 1, operands, operands + 1);
14561 gen_rtx_SET (VOIDmode,
14562 gen_rtx_MEM (SImode,
14563 gen_rtx_PRE_DEC (Pmode,
14564 stack_pointer_rtx)),
14567 gen_rtx_SET (VOIDmode,
14568 gen_rtx_MEM (SImode,
14569 gen_rtx_PRE_DEC (Pmode,
14570 stack_pointer_rtx)),
14575 /* It is better to store HImodes as SImodes. */
14576 if (!TARGET_PARTIAL_REG_STALL)
14577 operand = gen_lowpart (SImode, operand);
14581 gen_rtx_SET (VOIDmode,
14582 gen_rtx_MEM (GET_MODE (operand),
14583 gen_rtx_PRE_DEC (SImode,
14584 stack_pointer_rtx)),
14590 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14595 /* Free operand from the memory. */
14597 ix86_free_from_memory (enum machine_mode mode)
14599 if (!TARGET_RED_ZONE)
14603 if (mode == DImode || TARGET_64BIT)
14605 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14609 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14610 to pop or add instruction if registers are available. */
14611 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14612 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14617 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14618 QImode must go into class Q_REGS.
14619 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14620 movdf to do mem-to-mem moves through integer regs. */
14622 ix86_preferred_reload_class (rtx x, enum reg_class class)
14624 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14626 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14628 /* SSE can't load any constant directly yet. */
14629 if (SSE_CLASS_P (class))
14631 /* Floats can load 0 and 1. */
14632 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14634 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14635 if (MAYBE_SSE_CLASS_P (class))
14636 return (reg_class_subset_p (class, GENERAL_REGS)
14637 ? GENERAL_REGS : FLOAT_REGS);
14641 /* General regs can load everything. */
14642 if (reg_class_subset_p (class, GENERAL_REGS))
14643 return GENERAL_REGS;
14644 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14645 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14648 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14650 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14655 /* If we are copying between general and FP registers, we need a memory
14656 location. The same is true for SSE and MMX registers.
14658 The macro can't work reliably when one of the CLASSES is class containing
14659 registers from multiple units (SSE, MMX, integer). We avoid this by never
14660 combining those units in single alternative in the machine description.
14661 Ensure that this constraint holds to avoid unexpected surprises.
14663 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14664 enforce these sanity checks. */
14666 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14667 enum machine_mode mode, int strict)
14669 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14670 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14671 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14672 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14673 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14674 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14681 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14682 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14683 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14684 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14685 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14687 /* Return the cost of moving data from a register in class CLASS1 to
14688 one in class CLASS2.
14690 It is not required that the cost always equal 2 when FROM is the same as TO;
14691 on some machines it is expensive to move between registers if they are not
14692 general registers. */
14694 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14695 enum reg_class class2)
14697 /* In case we require secondary memory, compute cost of the store followed
14698 by load. In order to avoid bad register allocation choices, we need
14699 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14701 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14705 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14706 MEMORY_MOVE_COST (mode, class1, 1));
14707 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14708 MEMORY_MOVE_COST (mode, class2, 1));
14710 /* In case of copying from general_purpose_register we may emit multiple
14711 stores followed by single load causing memory size mismatch stall.
14712 Count this as arbitrarily high cost of 20. */
14713 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14716 /* In the case of FP/MMX moves, the registers actually overlap, and we
14717 have to switch modes in order to treat them differently. */
14718 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14719 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14725 /* Moves between SSE/MMX and integer unit are expensive. */
14726 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14727 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14728 return ix86_cost->mmxsse_to_integer;
14729 if (MAYBE_FLOAT_CLASS_P (class1))
14730 return ix86_cost->fp_move;
14731 if (MAYBE_SSE_CLASS_P (class1))
14732 return ix86_cost->sse_move;
14733 if (MAYBE_MMX_CLASS_P (class1))
14734 return ix86_cost->mmx_move;
14738 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14740 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14742 /* Flags and only flags can only hold CCmode values. */
14743 if (CC_REGNO_P (regno))
14744 return GET_MODE_CLASS (mode) == MODE_CC;
14745 if (GET_MODE_CLASS (mode) == MODE_CC
14746 || GET_MODE_CLASS (mode) == MODE_RANDOM
14747 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14749 if (FP_REGNO_P (regno))
14750 return VALID_FP_MODE_P (mode);
14751 if (SSE_REGNO_P (regno))
14752 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14753 if (MMX_REGNO_P (regno))
14755 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14756 /* We handle both integer and floats in the general purpose registers.
14757 In future we should be able to handle vector modes as well. */
14758 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14760 /* Take care for QImode values - they can be in non-QI regs, but then
14761 they do cause partial register stalls. */
14762 if (regno < 4 || mode != QImode || TARGET_64BIT)
14764 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14767 /* Return the cost of moving data of mode M between a
14768 register and memory. A value of 2 is the default; this cost is
14769 relative to those in `REGISTER_MOVE_COST'.
14771 If moving between registers and memory is more expensive than
14772 between two registers, you should define this macro to express the
14775 Model also increased moving costs of QImode registers in non
14779 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14781 if (FLOAT_CLASS_P (class))
14798 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14800 if (SSE_CLASS_P (class))
14803 switch (GET_MODE_SIZE (mode))
14817 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14819 if (MMX_CLASS_P (class))
14822 switch (GET_MODE_SIZE (mode))
14833 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14835 switch (GET_MODE_SIZE (mode))
14839 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14840 : ix86_cost->movzbl_load);
14842 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14843 : ix86_cost->int_store[0] + 4);
14846 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14848 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14849 if (mode == TFmode)
14851 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14852 * (((int) GET_MODE_SIZE (mode)
14853 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14857 /* Compute a (partial) cost for rtx X. Return true if the complete
14858 cost has been computed, and false if subexpressions should be
14859 scanned. In either case, *TOTAL contains the cost result. */
14862 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14864 enum machine_mode mode = GET_MODE (x);
14872 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14874 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14876 else if (flag_pic && SYMBOLIC_CONST (x)
14878 || (!GET_CODE (x) != LABEL_REF
14879 && (GET_CODE (x) != SYMBOL_REF
14880 || !SYMBOL_REF_LOCAL_P (x)))))
14887 if (mode == VOIDmode)
14890 switch (standard_80387_constant_p (x))
14895 default: /* Other constants */
14900 /* Start with (MEM (SYMBOL_REF)), since that's where
14901 it'll probably end up. Add a penalty for size. */
14902 *total = (COSTS_N_INSNS (1)
14903 + (flag_pic != 0 && !TARGET_64BIT)
14904 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14910 /* The zero extensions is often completely free on x86_64, so make
14911 it as cheap as possible. */
14912 if (TARGET_64BIT && mode == DImode
14913 && GET_MODE (XEXP (x, 0)) == SImode)
14915 else if (TARGET_ZERO_EXTEND_WITH_AND)
14916 *total = COSTS_N_INSNS (ix86_cost->add);
14918 *total = COSTS_N_INSNS (ix86_cost->movzx);
14922 *total = COSTS_N_INSNS (ix86_cost->movsx);
14926 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14927 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14929 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14932 *total = COSTS_N_INSNS (ix86_cost->add);
14935 if ((value == 2 || value == 3)
14936 && !TARGET_DECOMPOSE_LEA
14937 && ix86_cost->lea <= ix86_cost->shift_const)
14939 *total = COSTS_N_INSNS (ix86_cost->lea);
14949 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14951 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14953 if (INTVAL (XEXP (x, 1)) > 32)
14954 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14956 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14960 if (GET_CODE (XEXP (x, 1)) == AND)
14961 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14963 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14968 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14969 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14971 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14976 if (FLOAT_MODE_P (mode))
14977 *total = COSTS_N_INSNS (ix86_cost->fmul);
14978 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14980 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14983 for (nbits = 0; value != 0; value >>= 1)
14986 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14987 + nbits * ix86_cost->mult_bit);
14991 /* This is arbitrary */
14992 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14993 + 7 * ix86_cost->mult_bit);
15001 if (FLOAT_MODE_P (mode))
15002 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15004 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15008 if (FLOAT_MODE_P (mode))
15009 *total = COSTS_N_INSNS (ix86_cost->fadd);
15010 else if (!TARGET_DECOMPOSE_LEA
15011 && GET_MODE_CLASS (mode) == MODE_INT
15012 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15014 if (GET_CODE (XEXP (x, 0)) == PLUS
15015 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15016 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15017 && CONSTANT_P (XEXP (x, 1)))
15019 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15020 if (val == 2 || val == 4 || val == 8)
15022 *total = COSTS_N_INSNS (ix86_cost->lea);
15023 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15024 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15026 *total += rtx_cost (XEXP (x, 1), outer_code);
15030 else if (GET_CODE (XEXP (x, 0)) == MULT
15031 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15033 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15034 if (val == 2 || val == 4 || val == 8)
15036 *total = COSTS_N_INSNS (ix86_cost->lea);
15037 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15038 *total += rtx_cost (XEXP (x, 1), outer_code);
15042 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15044 *total = COSTS_N_INSNS (ix86_cost->lea);
15045 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15046 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15047 *total += rtx_cost (XEXP (x, 1), outer_code);
15054 if (FLOAT_MODE_P (mode))
15056 *total = COSTS_N_INSNS (ix86_cost->fadd);
15064 if (!TARGET_64BIT && mode == DImode)
15066 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15067 + (rtx_cost (XEXP (x, 0), outer_code)
15068 << (GET_MODE (XEXP (x, 0)) != DImode))
15069 + (rtx_cost (XEXP (x, 1), outer_code)
15070 << (GET_MODE (XEXP (x, 1)) != DImode)));
15076 if (FLOAT_MODE_P (mode))
15078 *total = COSTS_N_INSNS (ix86_cost->fchs);
15084 if (!TARGET_64BIT && mode == DImode)
15085 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15087 *total = COSTS_N_INSNS (ix86_cost->add);
15091 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15096 if (FLOAT_MODE_P (mode))
15097 *total = COSTS_N_INSNS (ix86_cost->fabs);
15101 if (FLOAT_MODE_P (mode))
15102 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15106 if (XINT (x, 1) == UNSPEC_TP)
15115 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15117 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15120 fputs ("\tpushl $", asm_out_file);
15121 assemble_name (asm_out_file, XSTR (symbol, 0));
15122 fputc ('\n', asm_out_file);
15128 static int current_machopic_label_num;
15130 /* Given a symbol name and its associated stub, write out the
15131 definition of the stub. */
15134 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15136 unsigned int length;
15137 char *binder_name, *symbol_name, lazy_ptr_name[32];
15138 int label = ++current_machopic_label_num;
15140 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15141 symb = (*targetm.strip_name_encoding) (symb);
15143 length = strlen (stub);
15144 binder_name = alloca (length + 32);
15145 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15147 length = strlen (symb);
15148 symbol_name = alloca (length + 32);
15149 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15151 sprintf (lazy_ptr_name, "L%d$lz", label);
15154 machopic_picsymbol_stub_section ();
15156 machopic_symbol_stub_section ();
15158 fprintf (file, "%s:\n", stub);
15159 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15163 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15164 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15165 fprintf (file, "\tjmp %%edx\n");
15168 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15170 fprintf (file, "%s:\n", binder_name);
15174 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15175 fprintf (file, "\tpushl %%eax\n");
15178 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15180 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15182 machopic_lazy_symbol_ptr_section ();
15183 fprintf (file, "%s:\n", lazy_ptr_name);
15184 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15185 fprintf (file, "\t.long %s\n", binder_name);
15187 #endif /* TARGET_MACHO */
15189 /* Order the registers for register allocator. */
15192 x86_order_regs_for_local_alloc (void)
15197 /* First allocate the local general purpose registers. */
15198 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15199 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15200 reg_alloc_order [pos++] = i;
15202 /* Global general purpose registers. */
15203 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15204 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15205 reg_alloc_order [pos++] = i;
15207 /* x87 registers come first in case we are doing FP math
15209 if (!TARGET_SSE_MATH)
15210 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15211 reg_alloc_order [pos++] = i;
15213 /* SSE registers. */
15214 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15215 reg_alloc_order [pos++] = i;
15216 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15217 reg_alloc_order [pos++] = i;
15219 /* x87 registers. */
15220 if (TARGET_SSE_MATH)
15221 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15222 reg_alloc_order [pos++] = i;
15224 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15225 reg_alloc_order [pos++] = i;
15227 /* Initialize the rest of array as we do not allocate some registers
15229 while (pos < FIRST_PSEUDO_REGISTER)
15230 reg_alloc_order [pos++] = 0;
15233 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15234 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15237 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15238 struct attribute_spec.handler. */
15240 ix86_handle_struct_attribute (tree *node, tree name,
15241 tree args ATTRIBUTE_UNUSED,
15242 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15245 if (DECL_P (*node))
15247 if (TREE_CODE (*node) == TYPE_DECL)
15248 type = &TREE_TYPE (*node);
15253 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15254 || TREE_CODE (*type) == UNION_TYPE)))
15256 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15257 *no_add_attrs = true;
15260 else if ((is_attribute_p ("ms_struct", name)
15261 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15262 || ((is_attribute_p ("gcc_struct", name)
15263 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15265 warning ("`%s' incompatible attribute ignored",
15266 IDENTIFIER_POINTER (name));
15267 *no_add_attrs = true;
15274 ix86_ms_bitfield_layout_p (tree record_type)
15276 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15277 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15278 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15281 /* Returns an expression indicating where the this parameter is
15282 located on entry to the FUNCTION. */
15285 x86_this_parameter (tree function)
15287 tree type = TREE_TYPE (function);
15291 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15292 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15295 if (ix86_function_regparm (type, function) > 0)
15299 parm = TYPE_ARG_TYPES (type);
15300 /* Figure out whether or not the function has a variable number of
15302 for (; parm; parm = TREE_CHAIN (parm))
15303 if (TREE_VALUE (parm) == void_type_node)
15305 /* If not, the this parameter is in the first argument. */
15309 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15311 return gen_rtx_REG (SImode, regno);
15315 if (aggregate_value_p (TREE_TYPE (type), type))
15316 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15318 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15321 /* Determine whether x86_output_mi_thunk can succeed. */
15324 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15325 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15326 HOST_WIDE_INT vcall_offset, tree function)
15328 /* 64-bit can handle anything. */
15332 /* For 32-bit, everything's fine if we have one free register. */
15333 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15336 /* Need a free register for vcall_offset. */
15340 /* Need a free register for GOT references. */
15341 if (flag_pic && !(*targetm.binds_local_p) (function))
15344 /* Otherwise ok. */
15348 /* Output the assembler code for a thunk function. THUNK_DECL is the
15349 declaration for the thunk function itself, FUNCTION is the decl for
15350 the target function. DELTA is an immediate constant offset to be
15351 added to THIS. If VCALL_OFFSET is nonzero, the word at
15352 *(*this + vcall_offset) should be added to THIS. */
15355 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15356 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15357 HOST_WIDE_INT vcall_offset, tree function)
15360 rtx this = x86_this_parameter (function);
15363 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15364 pull it in now and let DELTA benefit. */
15367 else if (vcall_offset)
15369 /* Put the this parameter into %eax. */
15371 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15372 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15375 this_reg = NULL_RTX;
15377 /* Adjust the this parameter by a fixed constant. */
15380 xops[0] = GEN_INT (delta);
15381 xops[1] = this_reg ? this_reg : this;
15384 if (!x86_64_general_operand (xops[0], DImode))
15386 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15388 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15392 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15395 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15398 /* Adjust the this parameter by a value stored in the vtable. */
15402 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15405 int tmp_regno = 2 /* ECX */;
15406 if (lookup_attribute ("fastcall",
15407 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15408 tmp_regno = 0 /* EAX */;
15409 tmp = gen_rtx_REG (SImode, tmp_regno);
15412 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15415 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15417 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15419 /* Adjust the this parameter. */
15420 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15421 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15423 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15424 xops[0] = GEN_INT (vcall_offset);
15426 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15427 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15429 xops[1] = this_reg;
15431 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15433 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15436 /* If necessary, drop THIS back to its stack slot. */
15437 if (this_reg && this_reg != this)
15439 xops[0] = this_reg;
15441 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15444 xops[0] = XEXP (DECL_RTL (function), 0);
15447 if (!flag_pic || (*targetm.binds_local_p) (function))
15448 output_asm_insn ("jmp\t%P0", xops);
15451 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15452 tmp = gen_rtx_CONST (Pmode, tmp);
15453 tmp = gen_rtx_MEM (QImode, tmp);
15455 output_asm_insn ("jmp\t%A0", xops);
15460 if (!flag_pic || (*targetm.binds_local_p) (function))
15461 output_asm_insn ("jmp\t%P0", xops);
15466 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15467 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15468 tmp = gen_rtx_MEM (QImode, tmp);
15470 output_asm_insn ("jmp\t%0", xops);
15473 #endif /* TARGET_MACHO */
15475 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15476 output_set_got (tmp);
15479 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15480 output_asm_insn ("jmp\t{*}%1", xops);
15486 x86_file_start (void)
15488 default_file_start ();
15489 if (X86_FILE_START_VERSION_DIRECTIVE)
15490 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15491 if (X86_FILE_START_FLTUSED)
15492 fputs ("\t.global\t__fltused\n", asm_out_file);
15493 if (ix86_asm_dialect == ASM_INTEL)
15494 fputs ("\t.intel_syntax\n", asm_out_file);
15498 x86_field_alignment (tree field, int computed)
15500 enum machine_mode mode;
15501 tree type = TREE_TYPE (field);
15503 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15505 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15506 ? get_inner_array_type (type) : type);
15507 if (mode == DFmode || mode == DCmode
15508 || GET_MODE_CLASS (mode) == MODE_INT
15509 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15510 return MIN (32, computed);
15514 /* Output assembler code to FILE to increment profiler label # LABELNO
15515 for profiling a function entry. */
15517 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15522 #ifndef NO_PROFILE_COUNTERS
15523 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15525 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15529 #ifndef NO_PROFILE_COUNTERS
15530 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15532 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15536 #ifndef NO_PROFILE_COUNTERS
15537 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15538 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15540 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15544 #ifndef NO_PROFILE_COUNTERS
15545 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15546 PROFILE_COUNT_REGISTER);
15548 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15552 /* We don't have exact information about the insn sizes, but we may assume
15553 quite safely that we are informed about all 1 byte insns and memory
15554 address sizes. This is enough to eliminate unnecessary padding in
15558 min_insn_size (rtx insn)
15562 if (!INSN_P (insn) || !active_insn_p (insn))
15565 /* Discard alignments we've emit and jump instructions. */
15566 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15567 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15569 if (GET_CODE (insn) == JUMP_INSN
15570 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15571 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15574 /* Important case - calls are always 5 bytes.
15575 It is common to have many calls in the row. */
15576 if (GET_CODE (insn) == CALL_INSN
15577 && symbolic_reference_mentioned_p (PATTERN (insn))
15578 && !SIBLING_CALL_P (insn))
15580 if (get_attr_length (insn) <= 1)
15583 /* For normal instructions we may rely on the sizes of addresses
15584 and the presence of symbol to require 4 bytes of encoding.
15585 This is not the case for jumps where references are PC relative. */
15586 if (GET_CODE (insn) != JUMP_INSN)
15588 l = get_attr_length_address (insn);
15589 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15598 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15602 k8_avoid_jump_misspredicts (void)
15604 rtx insn, start = get_insns ();
15605 int nbytes = 0, njumps = 0;
15608 /* Look for all minimal intervals of instructions containing 4 jumps.
15609 The intervals are bounded by START and INSN. NBYTES is the total
15610 size of instructions in the interval including INSN and not including
15611 START. When the NBYTES is smaller than 16 bytes, it is possible
15612 that the end of START and INSN ends up in the same 16byte page.
15614 The smallest offset in the page INSN can start is the case where START
15615 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15616 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15618 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15621 nbytes += min_insn_size (insn);
15623 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15624 INSN_UID (insn), min_insn_size (insn));
15625 if ((GET_CODE (insn) == JUMP_INSN
15626 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15627 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15628 || GET_CODE (insn) == CALL_INSN)
15635 start = NEXT_INSN (start);
15636 if ((GET_CODE (start) == JUMP_INSN
15637 && GET_CODE (PATTERN (start)) != ADDR_VEC
15638 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15639 || GET_CODE (start) == CALL_INSN)
15640 njumps--, isjump = 1;
15643 nbytes -= min_insn_size (start);
15648 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15649 INSN_UID (start), INSN_UID (insn), nbytes);
15651 if (njumps == 3 && isjump && nbytes < 16)
15653 int padsize = 15 - nbytes + min_insn_size (insn);
15656 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15657 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15662 /* Implement machine specific optimizations.
15663 At the moment we implement single transformation: AMD Athlon works faster
15664 when RET is not destination of conditional jump or directly preceded
15665 by other jump instruction. We avoid the penalty by inserting NOP just
15666 before the RET instructions in such cases. */
15672 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15674 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15676 basic_block bb = e->src;
15677 rtx ret = BB_END (bb);
15679 bool replace = false;
15681 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15682 || !maybe_hot_bb_p (bb))
15684 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15685 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15687 if (prev && GET_CODE (prev) == CODE_LABEL)
15690 for (e = bb->pred; e; e = e->pred_next)
15691 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15692 && !(e->flags & EDGE_FALLTHRU))
15697 prev = prev_active_insn (ret);
15699 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15700 || GET_CODE (prev) == CALL_INSN))
15702 /* Empty functions get branch mispredict even when the jump destination
15703 is not visible to us. */
15704 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15709 emit_insn_before (gen_return_internal_long (), ret);
15713 k8_avoid_jump_misspredicts ();
15716 /* Return nonzero when QImode register that must be represented via REX prefix
15719 x86_extended_QIreg_mentioned_p (rtx insn)
15722 extract_insn_cached (insn);
15723 for (i = 0; i < recog_data.n_operands; i++)
15724 if (REG_P (recog_data.operand[i])
15725 && REGNO (recog_data.operand[i]) >= 4)
15730 /* Return nonzero when P points to register encoded via REX prefix.
15731 Called via for_each_rtx. */
15733 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15735 unsigned int regno;
15738 regno = REGNO (*p);
15739 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15742 /* Return true when INSN mentions register that must be encoded using REX
15745 x86_extended_reg_mentioned_p (rtx insn)
15747 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15750 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15751 optabs would emit if we didn't have TFmode patterns. */
15754 x86_emit_floatuns (rtx operands[2])
15756 rtx neglab, donelab, i0, i1, f0, in, out;
15757 enum machine_mode mode, inmode;
15759 inmode = GET_MODE (operands[1]);
15760 if (inmode != SImode
15761 && inmode != DImode)
15765 in = force_reg (inmode, operands[1]);
15766 mode = GET_MODE (out);
15767 neglab = gen_label_rtx ();
15768 donelab = gen_label_rtx ();
15769 i1 = gen_reg_rtx (Pmode);
15770 f0 = gen_reg_rtx (mode);
15772 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15774 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15775 emit_jump_insn (gen_jump (donelab));
15778 emit_label (neglab);
15780 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15781 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15782 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15783 expand_float (f0, i0, 0);
15784 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15786 emit_label (donelab);
15789 /* Return if we do not know how to pass TYPE solely in registers. */
15791 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15793 if (default_must_pass_in_stack (mode, type))
15795 return (!TARGET_64BIT && type && mode == TImode);
15798 #include "gt-i386.h"