1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
834 static tree ix86_build_builtin_va_list (void);
838 rtx base, index, disp;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address (rtx, struct ix86_address *);
844 static int ix86_address_cost (rtx);
845 static bool ix86_cannot_force_const_mem (rtx);
846 static rtx ix86_delegitimize_address (rtx);
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi (const struct builtin_description *,
851 static rtx ix86_expand_sse_compare (const struct builtin_description *,
853 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
854 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
855 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_store_builtin (enum insn_code, tree);
857 static rtx safe_vector_operand (rtx, enum machine_mode);
858 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
859 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
860 enum rtx_code *, enum rtx_code *);
861 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
862 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
863 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
864 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
865 static int ix86_fp_comparison_cost (enum rtx_code code);
866 static unsigned int ix86_select_alt_pic_regnum (void);
867 static int ix86_save_reg (unsigned int, int);
868 static void ix86_compute_frame_layout (struct ix86_frame *);
869 static int ix86_comp_type_attributes (tree, tree);
870 static int ix86_function_regparm (tree, tree);
871 const struct attribute_spec ix86_attribute_table[];
872 static bool ix86_function_ok_for_sibcall (tree, tree);
873 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
874 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
875 static int ix86_value_regno (enum machine_mode);
876 static bool contains_128bit_aligned_vector_p (tree);
877 static bool ix86_ms_bitfield_layout_p (tree);
878 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
879 static int extended_reg_mentioned_1 (rtx *, void *);
880 static bool ix86_rtx_costs (rtx, int, int, int *);
881 static int min_insn_size (rtx);
882 static void k8_avoid_jump_misspredicts (void);
884 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
885 static void ix86_svr3_asm_out_constructor (rtx, int);
888 /* Register class used for passing given 64bit part of the argument.
889 These represent classes as documented by the PS ABI, with the exception
890 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
891 use SF or DFmode move instead of DImode to avoid reformatting penalties.
893 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
894 whenever possible (upper half does contain padding).
896 enum x86_64_reg_class
899 X86_64_INTEGER_CLASS,
900 X86_64_INTEGERSI_CLASS,
909 static const char * const x86_64_reg_class_name[] =
910 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
912 #define MAX_CLASSES 4
913 static int classify_argument (enum machine_mode, tree,
914 enum x86_64_reg_class [MAX_CLASSES], int);
915 static int examine_argument (enum machine_mode, tree, int, int *, int *);
916 static rtx construct_container (enum machine_mode, tree, int, int, int,
918 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
919 enum x86_64_reg_class);
921 /* Table of constants used by fldpi, fldln2, etc... */
922 static REAL_VALUE_TYPE ext_80387_constants_table [5];
923 static bool ext_80387_constants_init = 0;
924 static void init_ext_80387_constants (void);
926 /* Initialize the GCC target structure. */
927 #undef TARGET_ATTRIBUTE_TABLE
928 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
929 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
930 # undef TARGET_MERGE_DECL_ATTRIBUTES
931 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #undef TARGET_COMP_TYPE_ATTRIBUTES
935 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
937 #undef TARGET_INIT_BUILTINS
938 #define TARGET_INIT_BUILTINS ix86_init_builtins
940 #undef TARGET_EXPAND_BUILTIN
941 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
943 #undef TARGET_ASM_FUNCTION_EPILOGUE
944 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
946 #undef TARGET_ASM_OPEN_PAREN
947 #define TARGET_ASM_OPEN_PAREN ""
948 #undef TARGET_ASM_CLOSE_PAREN
949 #define TARGET_ASM_CLOSE_PAREN ""
951 #undef TARGET_ASM_ALIGNED_HI_OP
952 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
953 #undef TARGET_ASM_ALIGNED_SI_OP
954 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
956 #undef TARGET_ASM_ALIGNED_DI_OP
957 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #undef TARGET_ASM_UNALIGNED_HI_OP
961 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
962 #undef TARGET_ASM_UNALIGNED_SI_OP
963 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
964 #undef TARGET_ASM_UNALIGNED_DI_OP
965 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
967 #undef TARGET_SCHED_ADJUST_COST
968 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
969 #undef TARGET_SCHED_ISSUE_RATE
970 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
971 #undef TARGET_SCHED_VARIABLE_ISSUE
972 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
973 #undef TARGET_SCHED_INIT
974 #define TARGET_SCHED_INIT ix86_sched_init
975 #undef TARGET_SCHED_REORDER
976 #define TARGET_SCHED_REORDER ix86_sched_reorder
977 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
978 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
979 ia32_use_dfa_pipeline_interface
980 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
981 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
982 ia32_multipass_dfa_lookahead
984 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
985 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
988 #undef TARGET_HAVE_TLS
989 #define TARGET_HAVE_TLS true
991 #undef TARGET_CANNOT_FORCE_CONST_MEM
992 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
994 #undef TARGET_DELEGITIMIZE_ADDRESS
995 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
997 #undef TARGET_MS_BITFIELD_LAYOUT_P
998 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1000 #undef TARGET_ASM_OUTPUT_MI_THUNK
1001 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1002 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1005 #undef TARGET_ASM_FILE_START
1006 #define TARGET_ASM_FILE_START x86_file_start
1008 #undef TARGET_RTX_COSTS
1009 #define TARGET_RTX_COSTS ix86_rtx_costs
1010 #undef TARGET_ADDRESS_COST
1011 #define TARGET_ADDRESS_COST ix86_address_cost
1013 #undef TARGET_MACHINE_DEPENDENT_REORG
1014 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1016 #undef TARGET_BUILD_BUILTIN_VA_LIST
1017 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1019 struct gcc_target targetm = TARGET_INITIALIZER;
1021 /* The svr4 ABI for the i386 says that records and unions are returned
1023 #ifndef DEFAULT_PCC_STRUCT_RETURN
1024 #define DEFAULT_PCC_STRUCT_RETURN 1
1027 /* Sometimes certain combinations of command options do not make
1028 sense on a particular target machine. You can define a macro
1029 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1030 defined, is executed once just after all the command options have
1033 Don't use this macro to turn on various extra optimizations for
1034 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1037 override_options (void)
1040 /* Comes from final.c -- no real reason to change it. */
1041 #define MAX_CODE_ALIGN 16
1045 const struct processor_costs *cost; /* Processor costs */
1046 const int target_enable; /* Target flags to enable. */
1047 const int target_disable; /* Target flags to disable. */
1048 const int align_loop; /* Default alignments. */
1049 const int align_loop_max_skip;
1050 const int align_jump;
1051 const int align_jump_max_skip;
1052 const int align_func;
1054 const processor_target_table[PROCESSOR_max] =
1056 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1057 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1058 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1059 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1060 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1061 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1062 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1063 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1066 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1069 const char *const name; /* processor name or nickname. */
1070 const enum processor_type processor;
1071 const enum pta_flags
1076 PTA_PREFETCH_SSE = 8,
1082 const processor_alias_table[] =
1084 {"i386", PROCESSOR_I386, 0},
1085 {"i486", PROCESSOR_I486, 0},
1086 {"i586", PROCESSOR_PENTIUM, 0},
1087 {"pentium", PROCESSOR_PENTIUM, 0},
1088 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1089 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1090 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1091 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1092 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1093 {"i686", PROCESSOR_PENTIUMPRO, 0},
1094 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1095 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1096 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1097 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1098 PTA_MMX | PTA_PREFETCH_SSE},
1099 {"k6", PROCESSOR_K6, PTA_MMX},
1100 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1101 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1102 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1104 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1105 | PTA_3DNOW | PTA_3DNOW_A},
1106 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1113 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1116 int const pta_size = ARRAY_SIZE (processor_alias_table);
1118 /* Set the default values for switches whose default depends on TARGET_64BIT
1119 in case they weren't overwritten by command line options. */
1122 if (flag_omit_frame_pointer == 2)
1123 flag_omit_frame_pointer = 1;
1124 if (flag_asynchronous_unwind_tables == 2)
1125 flag_asynchronous_unwind_tables = 1;
1126 if (flag_pcc_struct_return == 2)
1127 flag_pcc_struct_return = 0;
1131 if (flag_omit_frame_pointer == 2)
1132 flag_omit_frame_pointer = 0;
1133 if (flag_asynchronous_unwind_tables == 2)
1134 flag_asynchronous_unwind_tables = 0;
1135 if (flag_pcc_struct_return == 2)
1136 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1139 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1140 SUBTARGET_OVERRIDE_OPTIONS;
1143 if (!ix86_tune_string && ix86_arch_string)
1144 ix86_tune_string = ix86_arch_string;
1145 if (!ix86_tune_string)
1146 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1147 if (!ix86_arch_string)
1148 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1150 if (ix86_cmodel_string != 0)
1152 if (!strcmp (ix86_cmodel_string, "small"))
1153 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1155 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1156 else if (!strcmp (ix86_cmodel_string, "32"))
1157 ix86_cmodel = CM_32;
1158 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1159 ix86_cmodel = CM_KERNEL;
1160 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1161 ix86_cmodel = CM_MEDIUM;
1162 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1163 ix86_cmodel = CM_LARGE;
1165 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1169 ix86_cmodel = CM_32;
1171 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1173 if (ix86_asm_string != 0)
1175 if (!strcmp (ix86_asm_string, "intel"))
1176 ix86_asm_dialect = ASM_INTEL;
1177 else if (!strcmp (ix86_asm_string, "att"))
1178 ix86_asm_dialect = ASM_ATT;
1180 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1182 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1183 error ("code model `%s' not supported in the %s bit mode",
1184 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1185 if (ix86_cmodel == CM_LARGE)
1186 sorry ("code model `large' not supported yet");
1187 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1188 sorry ("%i-bit mode not compiled in",
1189 (target_flags & MASK_64BIT) ? 64 : 32);
1191 for (i = 0; i < pta_size; i++)
1192 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1194 ix86_arch = processor_alias_table[i].processor;
1195 /* Default cpu tuning to the architecture. */
1196 ix86_tune = ix86_arch;
1197 if (processor_alias_table[i].flags & PTA_MMX
1198 && !(target_flags_explicit & MASK_MMX))
1199 target_flags |= MASK_MMX;
1200 if (processor_alias_table[i].flags & PTA_3DNOW
1201 && !(target_flags_explicit & MASK_3DNOW))
1202 target_flags |= MASK_3DNOW;
1203 if (processor_alias_table[i].flags & PTA_3DNOW_A
1204 && !(target_flags_explicit & MASK_3DNOW_A))
1205 target_flags |= MASK_3DNOW_A;
1206 if (processor_alias_table[i].flags & PTA_SSE
1207 && !(target_flags_explicit & MASK_SSE))
1208 target_flags |= MASK_SSE;
1209 if (processor_alias_table[i].flags & PTA_SSE2
1210 && !(target_flags_explicit & MASK_SSE2))
1211 target_flags |= MASK_SSE2;
1212 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1213 x86_prefetch_sse = true;
1214 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1215 error ("CPU you selected does not support x86-64 instruction set");
1220 error ("bad value (%s) for -march= switch", ix86_arch_string);
1222 for (i = 0; i < pta_size; i++)
1223 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1225 ix86_tune = processor_alias_table[i].processor;
1226 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1227 error ("CPU you selected does not support x86-64 instruction set");
1230 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1231 x86_prefetch_sse = true;
1233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1236 ix86_cost = &size_cost;
1238 ix86_cost = processor_target_table[ix86_tune].cost;
1239 target_flags |= processor_target_table[ix86_tune].target_enable;
1240 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1242 /* Arrange to set up i386_stack_locals for all functions. */
1243 init_machine_status = ix86_init_machine_status;
1245 /* Validate -mregparm= value. */
1246 if (ix86_regparm_string)
1248 i = atoi (ix86_regparm_string);
1249 if (i < 0 || i > REGPARM_MAX)
1250 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1256 ix86_regparm = REGPARM_MAX;
1258 /* If the user has provided any of the -malign-* options,
1259 warn and use that value only if -falign-* is not set.
1260 Remove this code in GCC 3.2 or later. */
1261 if (ix86_align_loops_string)
1263 warning ("-malign-loops is obsolete, use -falign-loops");
1264 if (align_loops == 0)
1266 i = atoi (ix86_align_loops_string);
1267 if (i < 0 || i > MAX_CODE_ALIGN)
1268 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1270 align_loops = 1 << i;
1274 if (ix86_align_jumps_string)
1276 warning ("-malign-jumps is obsolete, use -falign-jumps");
1277 if (align_jumps == 0)
1279 i = atoi (ix86_align_jumps_string);
1280 if (i < 0 || i > MAX_CODE_ALIGN)
1281 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1283 align_jumps = 1 << i;
1287 if (ix86_align_funcs_string)
1289 warning ("-malign-functions is obsolete, use -falign-functions");
1290 if (align_functions == 0)
1292 i = atoi (ix86_align_funcs_string);
1293 if (i < 0 || i > MAX_CODE_ALIGN)
1294 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1296 align_functions = 1 << i;
1300 /* Default align_* from the processor table. */
1301 if (align_loops == 0)
1303 align_loops = processor_target_table[ix86_tune].align_loop;
1304 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1306 if (align_jumps == 0)
1308 align_jumps = processor_target_table[ix86_tune].align_jump;
1309 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1311 if (align_functions == 0)
1313 align_functions = processor_target_table[ix86_tune].align_func;
1316 /* Validate -mpreferred-stack-boundary= value, or provide default.
1317 The default of 128 bits is for Pentium III's SSE __m128, but we
1318 don't want additional code to keep the stack aligned when
1319 optimizing for code size. */
1320 ix86_preferred_stack_boundary = (optimize_size
1321 ? TARGET_64BIT ? 128 : 32
1323 if (ix86_preferred_stack_boundary_string)
1325 i = atoi (ix86_preferred_stack_boundary_string);
1326 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1327 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1328 TARGET_64BIT ? 4 : 2);
1330 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1333 /* Validate -mbranch-cost= value, or provide default. */
1334 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1335 if (ix86_branch_cost_string)
1337 i = atoi (ix86_branch_cost_string);
1339 error ("-mbranch-cost=%d is not between 0 and 5", i);
1341 ix86_branch_cost = i;
1344 if (ix86_tls_dialect_string)
1346 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1347 ix86_tls_dialect = TLS_DIALECT_GNU;
1348 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1349 ix86_tls_dialect = TLS_DIALECT_SUN;
1351 error ("bad value (%s) for -mtls-dialect= switch",
1352 ix86_tls_dialect_string);
1355 /* Keep nonleaf frame pointers. */
1356 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1357 flag_omit_frame_pointer = 1;
1359 /* If we're doing fast math, we don't care about comparison order
1360 wrt NaNs. This lets us use a shorter comparison sequence. */
1361 if (flag_unsafe_math_optimizations)
1362 target_flags &= ~MASK_IEEE_FP;
1364 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1365 since the insns won't need emulation. */
1366 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1367 target_flags &= ~MASK_NO_FANCY_MATH_387;
1369 /* Turn on SSE2 builtins for -mpni. */
1371 target_flags |= MASK_SSE2;
1373 /* Turn on SSE builtins for -msse2. */
1375 target_flags |= MASK_SSE;
1379 if (TARGET_ALIGN_DOUBLE)
1380 error ("-malign-double makes no sense in the 64bit mode");
1382 error ("-mrtd calling convention not supported in the 64bit mode");
1383 /* Enable by default the SSE and MMX builtins. */
1384 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1385 ix86_fpmath = FPMATH_SSE;
1389 ix86_fpmath = FPMATH_387;
1390 /* i386 ABI does not specify red zone. It still makes sense to use it
1391 when programmer takes care to stack from being destroyed. */
1392 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1393 target_flags |= MASK_NO_RED_ZONE;
1396 if (ix86_fpmath_string != 0)
1398 if (! strcmp (ix86_fpmath_string, "387"))
1399 ix86_fpmath = FPMATH_387;
1400 else if (! strcmp (ix86_fpmath_string, "sse"))
1404 warning ("SSE instruction set disabled, using 387 arithmetics");
1405 ix86_fpmath = FPMATH_387;
1408 ix86_fpmath = FPMATH_SSE;
1410 else if (! strcmp (ix86_fpmath_string, "387,sse")
1411 || ! strcmp (ix86_fpmath_string, "sse,387"))
1415 warning ("SSE instruction set disabled, using 387 arithmetics");
1416 ix86_fpmath = FPMATH_387;
1418 else if (!TARGET_80387)
1420 warning ("387 instruction set disabled, using SSE arithmetics");
1421 ix86_fpmath = FPMATH_SSE;
1424 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1427 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1430 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1434 target_flags |= MASK_MMX;
1435 x86_prefetch_sse = true;
1438 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1441 target_flags |= MASK_MMX;
1442 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1443 extensions it adds. */
1444 if (x86_3dnow_a & (1 << ix86_arch))
1445 target_flags |= MASK_3DNOW_A;
1447 if ((x86_accumulate_outgoing_args & TUNEMASK)
1448 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1450 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1452 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1455 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1456 p = strchr (internal_label_prefix, 'X');
1457 internal_label_prefix_len = p - internal_label_prefix;
1463 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1465 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1466 make the problem with not enough registers even worse. */
1467 #ifdef INSN_SCHEDULING
1469 flag_schedule_insns = 0;
1472 /* The default values of these switches depend on the TARGET_64BIT
1473 that is not known at this moment. Mark these values with 2 and
1474 let user the to override these. In case there is no command line option
1475 specifying them, we will set the defaults in override_options. */
1477 flag_omit_frame_pointer = 2;
1478 flag_pcc_struct_return = 2;
1479 flag_asynchronous_unwind_tables = 2;
1482 /* Table of valid machine attributes. */
1483 const struct attribute_spec ix86_attribute_table[] =
1485 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1486 /* Stdcall attribute says callee is responsible for popping arguments
1487 if they are not variable. */
1488 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1489 /* Fastcall attribute says callee is responsible for popping arguments
1490 if they are not variable. */
1491 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1492 /* Cdecl attribute says the callee is a normal C declaration */
1493 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1494 /* Regparm attribute specifies how many integer arguments are to be
1495 passed in registers. */
1496 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1497 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1498 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1499 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1502 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1503 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { NULL, 0, 0, false, false, false, NULL }
1507 /* Decide whether we can make a sibling call to a function. DECL is the
1508 declaration of the function being targeted by the call and EXP is the
1509 CALL_EXPR representing the call. */
1512 ix86_function_ok_for_sibcall (tree decl, tree exp)
1514 /* If we are generating position-independent code, we cannot sibcall
1515 optimize any indirect call, or a direct call to a global function,
1516 as the PLT requires %ebx be live. */
1517 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1520 /* If we are returning floats on the 80387 register stack, we cannot
1521 make a sibcall from a function that doesn't return a float to a
1522 function that does or, conversely, from a function that does return
1523 a float to a function that doesn't; the necessary stack adjustment
1524 would not be executed. */
1525 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1526 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1529 /* If this call is indirect, we'll need to be able to use a call-clobbered
1530 register for the address of the target function. Make sure that all
1531 such registers are not used for passing parameters. */
1532 if (!decl && !TARGET_64BIT)
1536 /* We're looking at the CALL_EXPR, we need the type of the function. */
1537 type = TREE_OPERAND (exp, 0); /* pointer expression */
1538 type = TREE_TYPE (type); /* pointer type */
1539 type = TREE_TYPE (type); /* function type */
1541 if (ix86_function_regparm (type, NULL) >= 3)
1543 /* ??? Need to count the actual number of registers to be used,
1544 not the possible number of registers. Fix later. */
1549 /* Otherwise okay. That also includes certain types of indirect calls. */
1553 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1554 arguments as in struct attribute_spec.handler. */
1556 ix86_handle_cdecl_attribute (tree *node, tree name,
1557 tree args ATTRIBUTE_UNUSED,
1558 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1560 if (TREE_CODE (*node) != FUNCTION_TYPE
1561 && TREE_CODE (*node) != METHOD_TYPE
1562 && TREE_CODE (*node) != FIELD_DECL
1563 && TREE_CODE (*node) != TYPE_DECL)
1565 warning ("`%s' attribute only applies to functions",
1566 IDENTIFIER_POINTER (name));
1567 *no_add_attrs = true;
1571 if (is_attribute_p ("fastcall", name))
1573 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1575 error ("fastcall and stdcall attributes are not compatible");
1577 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1579 error ("fastcall and regparm attributes are not compatible");
1582 else if (is_attribute_p ("stdcall", name))
1584 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1586 error ("fastcall and stdcall attributes are not compatible");
1593 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1594 *no_add_attrs = true;
1600 /* Handle a "regparm" attribute;
1601 arguments as in struct attribute_spec.handler. */
1603 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1604 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1606 if (TREE_CODE (*node) != FUNCTION_TYPE
1607 && TREE_CODE (*node) != METHOD_TYPE
1608 && TREE_CODE (*node) != FIELD_DECL
1609 && TREE_CODE (*node) != TYPE_DECL)
1611 warning ("`%s' attribute only applies to functions",
1612 IDENTIFIER_POINTER (name));
1613 *no_add_attrs = true;
1619 cst = TREE_VALUE (args);
1620 if (TREE_CODE (cst) != INTEGER_CST)
1622 warning ("`%s' attribute requires an integer constant argument",
1623 IDENTIFIER_POINTER (name));
1624 *no_add_attrs = true;
1626 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1628 warning ("argument to `%s' attribute larger than %d",
1629 IDENTIFIER_POINTER (name), REGPARM_MAX);
1630 *no_add_attrs = true;
1633 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1635 error ("fastcall and regparm attributes are not compatible");
1642 /* Return 0 if the attributes for two types are incompatible, 1 if they
1643 are compatible, and 2 if they are nearly compatible (which causes a
1644 warning to be generated). */
1647 ix86_comp_type_attributes (tree type1, tree type2)
1649 /* Check for mismatch of non-default calling convention. */
1650 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1652 if (TREE_CODE (type1) != FUNCTION_TYPE)
1655 /* Check for mismatched fastcall types */
1656 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1657 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1660 /* Check for mismatched return types (cdecl vs stdcall). */
1661 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1662 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1667 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1668 DECL may be NULL when calling function indirectly
1669 or considering a libcall. */
1672 ix86_function_regparm (tree type, tree decl)
1675 int regparm = ix86_regparm;
1676 bool user_convention = false;
1680 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1683 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1684 user_convention = true;
1687 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1690 user_convention = true;
1693 /* Use register calling convention for local functions when possible. */
1694 if (!TARGET_64BIT && !user_convention && decl
1695 && flag_unit_at_a_time && !profile_flag)
1697 struct cgraph_local_info *i = cgraph_local_info (decl);
1700 /* We can't use regparm(3) for nested functions as these use
1701 static chain pointer in third argument. */
1702 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1712 /* Return true if EAX is live at the start of the function. Used by
1713 ix86_expand_prologue to determine if we need special help before
1714 calling allocate_stack_worker. */
1717 ix86_eax_live_at_start_p (void)
1719 /* Cheat. Don't bother working forward from ix86_function_regparm
1720 to the function type to whether an actual argument is located in
1721 eax. Instead just look at cfg info, which is still close enough
1722 to correct at this point. This gives false positives for broken
1723 functions that might use uninitialized data that happens to be
1724 allocated in eax, but who cares? */
1725 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1728 /* Value is the number of bytes of arguments automatically
1729 popped when returning from a subroutine call.
1730 FUNDECL is the declaration node of the function (as a tree),
1731 FUNTYPE is the data type of the function (as a tree),
1732 or for a library call it is an identifier node for the subroutine name.
1733 SIZE is the number of bytes of arguments passed on the stack.
1735 On the 80386, the RTD insn may be used to pop them if the number
1736 of args is fixed, but if the number is variable then the caller
1737 must pop them all. RTD can't be used for library calls now
1738 because the library is compiled with the Unix compiler.
1739 Use of RTD is a selectable option, since it is incompatible with
1740 standard Unix calling sequences. If the option is not selected,
1741 the caller must always pop the args.
1743 The attribute stdcall is equivalent to RTD on a per module basis. */
1746 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1748 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1750 /* Cdecl functions override -mrtd, and never pop the stack. */
1751 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1753 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1754 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1755 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1759 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1760 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1761 == void_type_node)))
1765 /* Lose any fake structure return argument if it is passed on the stack. */
1766 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1769 int nregs = ix86_function_regparm (funtype, fundecl);
1772 return GET_MODE_SIZE (Pmode);
1778 /* Argument support functions. */
1780 /* Return true when register may be used to pass function parameters. */
1782 ix86_function_arg_regno_p (int regno)
1786 return (regno < REGPARM_MAX
1787 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1788 if (SSE_REGNO_P (regno) && TARGET_SSE)
1790 /* RAX is used as hidden argument to va_arg functions. */
1793 for (i = 0; i < REGPARM_MAX; i++)
1794 if (regno == x86_64_int_parameter_registers[i])
1799 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1800 for a call to a function whose data type is FNTYPE.
1801 For a library call, FNTYPE is 0. */
1804 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1805 tree fntype, /* tree ptr for function decl */
1806 rtx libname, /* SYMBOL_REF of library name or 0 */
1809 static CUMULATIVE_ARGS zero_cum;
1810 tree param, next_param;
1812 if (TARGET_DEBUG_ARG)
1814 fprintf (stderr, "\ninit_cumulative_args (");
1816 fprintf (stderr, "fntype code = %s, ret code = %s",
1817 tree_code_name[(int) TREE_CODE (fntype)],
1818 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1820 fprintf (stderr, "no fntype");
1823 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1828 /* Set up the number of registers to use for passing arguments. */
1830 cum->nregs = ix86_function_regparm (fntype, fndecl);
1832 cum->nregs = ix86_regparm;
1833 cum->sse_nregs = SSE_REGPARM_MAX;
1834 cum->maybe_vaarg = false;
1836 /* Use ecx and edx registers if function has fastcall attribute */
1837 if (fntype && !TARGET_64BIT)
1839 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1847 /* Determine if this function has variable arguments. This is
1848 indicated by the last argument being 'void_type_mode' if there
1849 are no variable arguments. If there are variable arguments, then
1850 we won't pass anything in registers */
1854 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1855 param != 0; param = next_param)
1857 next_param = TREE_CHAIN (param);
1858 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1865 cum->maybe_vaarg = true;
1869 if ((!fntype && !libname)
1870 || (fntype && !TYPE_ARG_TYPES (fntype)))
1871 cum->maybe_vaarg = 1;
1873 if (TARGET_DEBUG_ARG)
1874 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1879 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1880 of this code is to classify each 8bytes of incoming argument by the register
1881 class and assign registers accordingly. */
1883 /* Return the union class of CLASS1 and CLASS2.
1884 See the x86-64 PS ABI for details. */
1886 static enum x86_64_reg_class
1887 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1889 /* Rule #1: If both classes are equal, this is the resulting class. */
1890 if (class1 == class2)
1893 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1895 if (class1 == X86_64_NO_CLASS)
1897 if (class2 == X86_64_NO_CLASS)
1900 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1901 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1902 return X86_64_MEMORY_CLASS;
1904 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1905 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1906 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1907 return X86_64_INTEGERSI_CLASS;
1908 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1909 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1910 return X86_64_INTEGER_CLASS;
1912 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1913 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1914 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1915 return X86_64_MEMORY_CLASS;
1917 /* Rule #6: Otherwise class SSE is used. */
1918 return X86_64_SSE_CLASS;
1921 /* Classify the argument of type TYPE and mode MODE.
1922 CLASSES will be filled by the register class used to pass each word
1923 of the operand. The number of words is returned. In case the parameter
1924 should be passed in memory, 0 is returned. As a special case for zero
1925 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1927 BIT_OFFSET is used internally for handling records and specifies offset
1928 of the offset in bits modulo 256 to avoid overflow cases.
1930 See the x86-64 PS ABI for details.
1934 classify_argument (enum machine_mode mode, tree type,
1935 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1937 HOST_WIDE_INT bytes =
1938 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1939 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1941 /* Variable sized entities are always passed/returned in memory. */
1945 if (mode != VOIDmode
1946 && MUST_PASS_IN_STACK (mode, type))
1949 if (type && AGGREGATE_TYPE_P (type))
1953 enum x86_64_reg_class subclasses[MAX_CLASSES];
1955 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1959 for (i = 0; i < words; i++)
1960 classes[i] = X86_64_NO_CLASS;
1962 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1963 signalize memory class, so handle it as special case. */
1966 classes[0] = X86_64_NO_CLASS;
1970 /* Classify each field of record and merge classes. */
1971 if (TREE_CODE (type) == RECORD_TYPE)
1973 /* For classes first merge in the field of the subclasses. */
1974 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1976 tree bases = TYPE_BINFO_BASETYPES (type);
1977 int n_bases = TREE_VEC_LENGTH (bases);
1980 for (i = 0; i < n_bases; ++i)
1982 tree binfo = TREE_VEC_ELT (bases, i);
1984 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1985 tree type = BINFO_TYPE (binfo);
1987 num = classify_argument (TYPE_MODE (type),
1989 (offset + bit_offset) % 256);
1992 for (i = 0; i < num; i++)
1994 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1996 merge_classes (subclasses[i], classes[i + pos]);
2000 /* And now merge the fields of structure. */
2001 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2003 if (TREE_CODE (field) == FIELD_DECL)
2007 /* Bitfields are always classified as integer. Handle them
2008 early, since later code would consider them to be
2009 misaligned integers. */
2010 if (DECL_BIT_FIELD (field))
2012 for (i = int_bit_position (field) / 8 / 8;
2013 i < (int_bit_position (field)
2014 + tree_low_cst (DECL_SIZE (field), 0)
2017 merge_classes (X86_64_INTEGER_CLASS,
2022 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2023 TREE_TYPE (field), subclasses,
2024 (int_bit_position (field)
2025 + bit_offset) % 256);
2028 for (i = 0; i < num; i++)
2031 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2033 merge_classes (subclasses[i], classes[i + pos]);
2039 /* Arrays are handled as small records. */
2040 else if (TREE_CODE (type) == ARRAY_TYPE)
2043 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2044 TREE_TYPE (type), subclasses, bit_offset);
2048 /* The partial classes are now full classes. */
2049 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2050 subclasses[0] = X86_64_SSE_CLASS;
2051 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2052 subclasses[0] = X86_64_INTEGER_CLASS;
2054 for (i = 0; i < words; i++)
2055 classes[i] = subclasses[i % num];
2057 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2058 else if (TREE_CODE (type) == UNION_TYPE
2059 || TREE_CODE (type) == QUAL_UNION_TYPE)
2061 /* For classes first merge in the field of the subclasses. */
2062 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2064 tree bases = TYPE_BINFO_BASETYPES (type);
2065 int n_bases = TREE_VEC_LENGTH (bases);
2068 for (i = 0; i < n_bases; ++i)
2070 tree binfo = TREE_VEC_ELT (bases, i);
2072 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2073 tree type = BINFO_TYPE (binfo);
2075 num = classify_argument (TYPE_MODE (type),
2077 (offset + (bit_offset % 64)) % 256);
2080 for (i = 0; i < num; i++)
2082 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2084 merge_classes (subclasses[i], classes[i + pos]);
2088 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2090 if (TREE_CODE (field) == FIELD_DECL)
2093 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2094 TREE_TYPE (field), subclasses,
2098 for (i = 0; i < num; i++)
2099 classes[i] = merge_classes (subclasses[i], classes[i]);
2103 else if (TREE_CODE (type) == SET_TYPE)
2107 classes[0] = X86_64_INTEGERSI_CLASS;
2110 else if (bytes <= 8)
2112 classes[0] = X86_64_INTEGER_CLASS;
2115 else if (bytes <= 12)
2117 classes[0] = X86_64_INTEGER_CLASS;
2118 classes[1] = X86_64_INTEGERSI_CLASS;
2123 classes[0] = X86_64_INTEGER_CLASS;
2124 classes[1] = X86_64_INTEGER_CLASS;
2131 /* Final merger cleanup. */
2132 for (i = 0; i < words; i++)
2134 /* If one class is MEMORY, everything should be passed in
2136 if (classes[i] == X86_64_MEMORY_CLASS)
2139 /* The X86_64_SSEUP_CLASS should be always preceded by
2140 X86_64_SSE_CLASS. */
2141 if (classes[i] == X86_64_SSEUP_CLASS
2142 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2143 classes[i] = X86_64_SSE_CLASS;
2145 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2146 if (classes[i] == X86_64_X87UP_CLASS
2147 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2148 classes[i] = X86_64_SSE_CLASS;
2153 /* Compute alignment needed. We align all types to natural boundaries with
2154 exception of XFmode that is aligned to 64bits. */
2155 if (mode != VOIDmode && mode != BLKmode)
2157 int mode_alignment = GET_MODE_BITSIZE (mode);
2160 mode_alignment = 128;
2161 else if (mode == XCmode)
2162 mode_alignment = 256;
2163 /* Misaligned fields are always returned in memory. */
2164 if (bit_offset % mode_alignment)
2168 /* Classification of atomic types. */
2178 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2179 classes[0] = X86_64_INTEGERSI_CLASS;
2181 classes[0] = X86_64_INTEGER_CLASS;
2185 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2188 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2189 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2192 if (!(bit_offset % 64))
2193 classes[0] = X86_64_SSESF_CLASS;
2195 classes[0] = X86_64_SSE_CLASS;
2198 classes[0] = X86_64_SSEDF_CLASS;
2201 classes[0] = X86_64_X87_CLASS;
2202 classes[1] = X86_64_X87UP_CLASS;
2208 classes[0] = X86_64_X87_CLASS;
2209 classes[1] = X86_64_X87UP_CLASS;
2210 classes[2] = X86_64_X87_CLASS;
2211 classes[3] = X86_64_X87UP_CLASS;
2214 classes[0] = X86_64_SSEDF_CLASS;
2215 classes[1] = X86_64_SSEDF_CLASS;
2218 classes[0] = X86_64_SSE_CLASS;
2226 classes[0] = X86_64_SSE_CLASS;
2227 classes[1] = X86_64_SSEUP_CLASS;
2242 /* Examine the argument and return set number of register required in each
2243 class. Return 0 iff parameter should be passed in memory. */
2245 examine_argument (enum machine_mode mode, tree type, int in_return,
2246 int *int_nregs, int *sse_nregs)
2248 enum x86_64_reg_class class[MAX_CLASSES];
2249 int n = classify_argument (mode, type, class, 0);
2255 for (n--; n >= 0; n--)
2258 case X86_64_INTEGER_CLASS:
2259 case X86_64_INTEGERSI_CLASS:
2262 case X86_64_SSE_CLASS:
2263 case X86_64_SSESF_CLASS:
2264 case X86_64_SSEDF_CLASS:
2267 case X86_64_NO_CLASS:
2268 case X86_64_SSEUP_CLASS:
2270 case X86_64_X87_CLASS:
2271 case X86_64_X87UP_CLASS:
2275 case X86_64_MEMORY_CLASS:
2280 /* Construct container for the argument used by GCC interface. See
2281 FUNCTION_ARG for the detailed description. */
2283 construct_container (enum machine_mode mode, tree type, int in_return,
2284 int nintregs, int nsseregs, const int * intreg,
2287 enum machine_mode tmpmode;
2289 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2290 enum x86_64_reg_class class[MAX_CLASSES];
2294 int needed_sseregs, needed_intregs;
2295 rtx exp[MAX_CLASSES];
2298 n = classify_argument (mode, type, class, 0);
2299 if (TARGET_DEBUG_ARG)
2302 fprintf (stderr, "Memory class\n");
2305 fprintf (stderr, "Classes:");
2306 for (i = 0; i < n; i++)
2308 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2310 fprintf (stderr, "\n");
2315 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2317 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2320 /* First construct simple cases. Avoid SCmode, since we want to use
2321 single register to pass this type. */
2322 if (n == 1 && mode != SCmode)
2325 case X86_64_INTEGER_CLASS:
2326 case X86_64_INTEGERSI_CLASS:
2327 return gen_rtx_REG (mode, intreg[0]);
2328 case X86_64_SSE_CLASS:
2329 case X86_64_SSESF_CLASS:
2330 case X86_64_SSEDF_CLASS:
2331 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2332 case X86_64_X87_CLASS:
2333 return gen_rtx_REG (mode, FIRST_STACK_REG);
2334 case X86_64_NO_CLASS:
2335 /* Zero sized array, struct or class. */
2340 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2341 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2343 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2344 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2345 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2346 && class[1] == X86_64_INTEGER_CLASS
2347 && (mode == CDImode || mode == TImode || mode == TFmode)
2348 && intreg[0] + 1 == intreg[1])
2349 return gen_rtx_REG (mode, intreg[0]);
2351 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2352 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2353 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2355 /* Otherwise figure out the entries of the PARALLEL. */
2356 for (i = 0; i < n; i++)
2360 case X86_64_NO_CLASS:
2362 case X86_64_INTEGER_CLASS:
2363 case X86_64_INTEGERSI_CLASS:
2364 /* Merge TImodes on aligned occasions here too. */
2365 if (i * 8 + 8 > bytes)
2366 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2367 else if (class[i] == X86_64_INTEGERSI_CLASS)
2371 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2372 if (tmpmode == BLKmode)
2374 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2375 gen_rtx_REG (tmpmode, *intreg),
2379 case X86_64_SSESF_CLASS:
2380 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2381 gen_rtx_REG (SFmode,
2382 SSE_REGNO (sse_regno)),
2386 case X86_64_SSEDF_CLASS:
2387 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2388 gen_rtx_REG (DFmode,
2389 SSE_REGNO (sse_regno)),
2393 case X86_64_SSE_CLASS:
2394 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2398 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2399 gen_rtx_REG (tmpmode,
2400 SSE_REGNO (sse_regno)),
2402 if (tmpmode == TImode)
2410 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2411 for (i = 0; i < nexps; i++)
2412 XVECEXP (ret, 0, i) = exp [i];
2416 /* Update the data in CUM to advance over an argument
2417 of mode MODE and data type TYPE.
2418 (TYPE is null for libcalls where that information may not be available.) */
2421 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2422 enum machine_mode mode, /* current arg mode */
2423 tree type, /* type of the argument or 0 if lib support */
2424 int named) /* whether or not the argument was named */
2427 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2428 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2430 if (TARGET_DEBUG_ARG)
2432 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2433 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2436 int int_nregs, sse_nregs;
2437 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2438 cum->words += words;
2439 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2441 cum->nregs -= int_nregs;
2442 cum->sse_nregs -= sse_nregs;
2443 cum->regno += int_nregs;
2444 cum->sse_regno += sse_nregs;
2447 cum->words += words;
2451 if (TARGET_SSE && mode == TImode)
2453 cum->sse_words += words;
2454 cum->sse_nregs -= 1;
2455 cum->sse_regno += 1;
2456 if (cum->sse_nregs <= 0)
2464 cum->words += words;
2465 cum->nregs -= words;
2466 cum->regno += words;
2468 if (cum->nregs <= 0)
2478 /* Define where to put the arguments to a function.
2479 Value is zero to push the argument on the stack,
2480 or a hard register in which to store the argument.
2482 MODE is the argument's machine mode.
2483 TYPE is the data type of the argument (as a tree).
2484 This is null for libcalls where that information may
2486 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2487 the preceding args and about the function being called.
2488 NAMED is nonzero if this argument is a named parameter
2489 (otherwise it is an extra parameter matching an ellipsis). */
2492 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2493 enum machine_mode mode, /* current arg mode */
2494 tree type, /* type of the argument or 0 if lib support */
2495 int named) /* != 0 for normal args, == 0 for ... args */
2499 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2500 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2502 /* Handle a hidden AL argument containing number of registers for varargs
2503 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2505 if (mode == VOIDmode)
2508 return GEN_INT (cum->maybe_vaarg
2509 ? (cum->sse_nregs < 0
2517 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2518 &x86_64_int_parameter_registers [cum->regno],
2523 /* For now, pass fp/complex values on the stack. */
2535 if (words <= cum->nregs)
2537 int regno = cum->regno;
2539 /* Fastcall allocates the first two DWORD (SImode) or
2540 smaller arguments to ECX and EDX. */
2543 if (mode == BLKmode || mode == DImode)
2546 /* ECX not EAX is the first allocated register. */
2550 ret = gen_rtx_REG (mode, regno);
2555 ret = gen_rtx_REG (mode, cum->sse_regno);
2559 if (TARGET_DEBUG_ARG)
2562 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2563 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2566 print_simple_rtl (stderr, ret);
2568 fprintf (stderr, ", stack");
2570 fprintf (stderr, " )\n");
2576 /* A C expression that indicates when an argument must be passed by
2577 reference. If nonzero for an argument, a copy of that argument is
2578 made in memory and a pointer to the argument is passed instead of
2579 the argument itself. The pointer is passed in whatever way is
2580 appropriate for passing a pointer to that type. */
2583 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2584 enum machine_mode mode ATTRIBUTE_UNUSED,
2585 tree type, int named ATTRIBUTE_UNUSED)
2590 if (type && int_size_in_bytes (type) == -1)
2592 if (TARGET_DEBUG_ARG)
2593 fprintf (stderr, "function_arg_pass_by_reference\n");
2600 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2603 contains_128bit_aligned_vector_p (tree type)
2605 enum machine_mode mode = TYPE_MODE (type);
2606 if (SSE_REG_MODE_P (mode)
2607 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2609 if (TYPE_ALIGN (type) < 128)
2612 if (AGGREGATE_TYPE_P (type))
2614 /* Walk the aggregates recursively. */
2615 if (TREE_CODE (type) == RECORD_TYPE
2616 || TREE_CODE (type) == UNION_TYPE
2617 || TREE_CODE (type) == QUAL_UNION_TYPE)
2621 if (TYPE_BINFO (type) != NULL
2622 && TYPE_BINFO_BASETYPES (type) != NULL)
2624 tree bases = TYPE_BINFO_BASETYPES (type);
2625 int n_bases = TREE_VEC_LENGTH (bases);
2628 for (i = 0; i < n_bases; ++i)
2630 tree binfo = TREE_VEC_ELT (bases, i);
2631 tree type = BINFO_TYPE (binfo);
2633 if (contains_128bit_aligned_vector_p (type))
2637 /* And now merge the fields of structure. */
2638 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2640 if (TREE_CODE (field) == FIELD_DECL
2641 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2645 /* Just for use if some languages passes arrays by value. */
2646 else if (TREE_CODE (type) == ARRAY_TYPE)
2648 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2657 /* Gives the alignment boundary, in bits, of an argument with the
2658 specified mode and type. */
2661 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2665 align = TYPE_ALIGN (type);
2667 align = GET_MODE_ALIGNMENT (mode);
2668 if (align < PARM_BOUNDARY)
2669 align = PARM_BOUNDARY;
2672 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2673 make an exception for SSE modes since these require 128bit
2676 The handling here differs from field_alignment. ICC aligns MMX
2677 arguments to 4 byte boundaries, while structure fields are aligned
2678 to 8 byte boundaries. */
2681 if (!SSE_REG_MODE_P (mode))
2682 align = PARM_BOUNDARY;
2686 if (!contains_128bit_aligned_vector_p (type))
2687 align = PARM_BOUNDARY;
2695 /* Return true if N is a possible register number of function value. */
2697 ix86_function_value_regno_p (int regno)
2701 return ((regno) == 0
2702 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2703 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2705 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2706 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2707 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2710 /* Define how to find the value returned by a function.
2711 VALTYPE is the data type of the value (as a tree).
2712 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2713 otherwise, FUNC is 0. */
2715 ix86_function_value (tree valtype)
2719 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2720 REGPARM_MAX, SSE_REGPARM_MAX,
2721 x86_64_int_return_registers, 0);
2722 /* For zero sized structures, construct_container return NULL, but we need
2723 to keep rest of compiler happy by returning meaningful value. */
2725 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2729 return gen_rtx_REG (TYPE_MODE (valtype),
2730 ix86_value_regno (TYPE_MODE (valtype)));
2733 /* Return false iff type is returned in memory. */
2735 ix86_return_in_memory (tree type)
2737 int needed_intregs, needed_sseregs, size;
2738 enum machine_mode mode = TYPE_MODE (type);
2741 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2743 if (mode == BLKmode)
2746 size = int_size_in_bytes (type);
2748 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2751 if (VECTOR_MODE_P (mode) || mode == TImode)
2753 /* User-created vectors small enough to fit in EAX. */
2757 /* MMX/3dNow values are returned on the stack, since we've
2758 got to EMMS/FEMMS before returning. */
2762 /* SSE values are returned in XMM0. */
2763 /* ??? Except when it doesn't exist? We have a choice of
2764 either (1) being abi incompatible with a -march switch,
2765 or (2) generating an error here. Given no good solution,
2766 I think the safest thing is one warning. The user won't
2767 be able to use -Werror, but... */
2778 warning ("SSE vector return without SSE enabled "
2793 /* Define how to find the value returned by a library function
2794 assuming the value has mode MODE. */
2796 ix86_libcall_value (enum machine_mode mode)
2806 return gen_rtx_REG (mode, FIRST_SSE_REG);
2809 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2814 return gen_rtx_REG (mode, 0);
2818 return gen_rtx_REG (mode, ix86_value_regno (mode));
2821 /* Given a mode, return the register to use for a return value. */
2824 ix86_value_regno (enum machine_mode mode)
2826 /* Floating point return values in %st(0). */
2827 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2828 return FIRST_FLOAT_REG;
2829 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2830 we prevent this case when sse is not available. */
2831 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2832 return FIRST_SSE_REG;
2833 /* Everything else in %eax. */
2837 /* Create the va_list data type. */
2840 ix86_build_builtin_va_list (void)
2842 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2844 /* For i386 we use plain pointer to argument area. */
2846 return build_pointer_type (char_type_node);
2848 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2849 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2851 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2852 unsigned_type_node);
2853 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2854 unsigned_type_node);
2855 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2857 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2860 DECL_FIELD_CONTEXT (f_gpr) = record;
2861 DECL_FIELD_CONTEXT (f_fpr) = record;
2862 DECL_FIELD_CONTEXT (f_ovf) = record;
2863 DECL_FIELD_CONTEXT (f_sav) = record;
2865 TREE_CHAIN (record) = type_decl;
2866 TYPE_NAME (record) = type_decl;
2867 TYPE_FIELDS (record) = f_gpr;
2868 TREE_CHAIN (f_gpr) = f_fpr;
2869 TREE_CHAIN (f_fpr) = f_ovf;
2870 TREE_CHAIN (f_ovf) = f_sav;
2872 layout_type (record);
2874 /* The correct type is an array type of one element. */
2875 return build_array_type (record, build_index_type (size_zero_node));
2878 /* Perform any needed actions needed for a function that is receiving a
2879 variable number of arguments.
2883 MODE and TYPE are the mode and type of the current parameter.
2885 PRETEND_SIZE is a variable that should be set to the amount of stack
2886 that must be pushed by the prolog to pretend that our caller pushed
2889 Normally, this macro will push all remaining incoming registers on the
2890 stack and set PRETEND_SIZE to the length of the registers pushed. */
2893 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2894 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2897 CUMULATIVE_ARGS next_cum;
2898 rtx save_area = NULL_RTX, mem;
2911 /* Indicate to allocate space on the stack for varargs save area. */
2912 ix86_save_varrargs_registers = 1;
2914 cfun->stack_alignment_needed = 128;
2916 fntype = TREE_TYPE (current_function_decl);
2917 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2918 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2919 != void_type_node));
2921 /* For varargs, we do not want to skip the dummy va_dcl argument.
2922 For stdargs, we do want to skip the last named argument. */
2925 function_arg_advance (&next_cum, mode, type, 1);
2928 save_area = frame_pointer_rtx;
2930 set = get_varargs_alias_set ();
2932 for (i = next_cum.regno; i < ix86_regparm; i++)
2934 mem = gen_rtx_MEM (Pmode,
2935 plus_constant (save_area, i * UNITS_PER_WORD));
2936 set_mem_alias_set (mem, set);
2937 emit_move_insn (mem, gen_rtx_REG (Pmode,
2938 x86_64_int_parameter_registers[i]));
2941 if (next_cum.sse_nregs)
2943 /* Now emit code to save SSE registers. The AX parameter contains number
2944 of SSE parameter registers used to call this function. We use
2945 sse_prologue_save insn template that produces computed jump across
2946 SSE saves. We need some preparation work to get this working. */
2948 label = gen_label_rtx ();
2949 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2951 /* Compute address to jump to :
2952 label - 5*eax + nnamed_sse_arguments*5 */
2953 tmp_reg = gen_reg_rtx (Pmode);
2954 nsse_reg = gen_reg_rtx (Pmode);
2955 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2956 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2957 gen_rtx_MULT (Pmode, nsse_reg,
2959 if (next_cum.sse_regno)
2962 gen_rtx_CONST (DImode,
2963 gen_rtx_PLUS (DImode,
2965 GEN_INT (next_cum.sse_regno * 4))));
2967 emit_move_insn (nsse_reg, label_ref);
2968 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2970 /* Compute address of memory block we save into. We always use pointer
2971 pointing 127 bytes after first byte to store - this is needed to keep
2972 instruction size limited by 4 bytes. */
2973 tmp_reg = gen_reg_rtx (Pmode);
2974 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2975 plus_constant (save_area,
2976 8 * REGPARM_MAX + 127)));
2977 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2978 set_mem_alias_set (mem, set);
2979 set_mem_align (mem, BITS_PER_WORD);
2981 /* And finally do the dirty job! */
2982 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2983 GEN_INT (next_cum.sse_regno), label));
2988 /* Implement va_start. */
2991 ix86_va_start (tree valist, rtx nextarg)
2993 HOST_WIDE_INT words, n_gpr, n_fpr;
2994 tree f_gpr, f_fpr, f_ovf, f_sav;
2995 tree gpr, fpr, ovf, sav, t;
2997 /* Only 64bit target needs something special. */
3000 std_expand_builtin_va_start (valist, nextarg);
3004 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3005 f_fpr = TREE_CHAIN (f_gpr);
3006 f_ovf = TREE_CHAIN (f_fpr);
3007 f_sav = TREE_CHAIN (f_ovf);
3009 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3010 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3011 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3012 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3013 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3015 /* Count number of gp and fp argument registers used. */
3016 words = current_function_args_info.words;
3017 n_gpr = current_function_args_info.regno;
3018 n_fpr = current_function_args_info.sse_regno;
3020 if (TARGET_DEBUG_ARG)
3021 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3022 (int) words, (int) n_gpr, (int) n_fpr);
3024 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3025 build_int_2 (n_gpr * 8, 0));
3026 TREE_SIDE_EFFECTS (t) = 1;
3027 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3029 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3030 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3031 TREE_SIDE_EFFECTS (t) = 1;
3032 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3034 /* Find the overflow area. */
3035 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3037 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3038 build_int_2 (words * UNITS_PER_WORD, 0));
3039 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3040 TREE_SIDE_EFFECTS (t) = 1;
3041 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3043 /* Find the register save area.
3044 Prologue of the function save it right above stack frame. */
3045 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3046 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3047 TREE_SIDE_EFFECTS (t) = 1;
3048 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3051 /* Implement va_arg. */
3053 ix86_va_arg (tree valist, tree type)
3055 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3056 tree f_gpr, f_fpr, f_ovf, f_sav;
3057 tree gpr, fpr, ovf, sav, t;
3059 rtx lab_false, lab_over = NULL_RTX;
3064 /* Only 64bit target needs something special. */
3067 return std_expand_builtin_va_arg (valist, type);
3070 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3071 f_fpr = TREE_CHAIN (f_gpr);
3072 f_ovf = TREE_CHAIN (f_fpr);
3073 f_sav = TREE_CHAIN (f_ovf);
3075 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3076 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3077 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3078 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3079 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3081 size = int_size_in_bytes (type);
3084 /* Passed by reference. */
3086 type = build_pointer_type (type);
3087 size = int_size_in_bytes (type);
3089 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3091 container = construct_container (TYPE_MODE (type), type, 0,
3092 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3094 * Pull the value out of the saved registers ...
3097 addr_rtx = gen_reg_rtx (Pmode);
3101 rtx int_addr_rtx, sse_addr_rtx;
3102 int needed_intregs, needed_sseregs;
3105 lab_over = gen_label_rtx ();
3106 lab_false = gen_label_rtx ();
3108 examine_argument (TYPE_MODE (type), type, 0,
3109 &needed_intregs, &needed_sseregs);
3112 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3113 || TYPE_ALIGN (type) > 128);
3115 /* In case we are passing structure, verify that it is consecutive block
3116 on the register save area. If not we need to do moves. */
3117 if (!need_temp && !REG_P (container))
3119 /* Verify that all registers are strictly consecutive */
3120 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3124 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3126 rtx slot = XVECEXP (container, 0, i);
3127 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3128 || INTVAL (XEXP (slot, 1)) != i * 16)
3136 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3138 rtx slot = XVECEXP (container, 0, i);
3139 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3140 || INTVAL (XEXP (slot, 1)) != i * 8)
3147 int_addr_rtx = addr_rtx;
3148 sse_addr_rtx = addr_rtx;
3152 int_addr_rtx = gen_reg_rtx (Pmode);
3153 sse_addr_rtx = gen_reg_rtx (Pmode);
3155 /* First ensure that we fit completely in registers. */
3158 emit_cmp_and_jump_insns (expand_expr
3159 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3160 GEN_INT ((REGPARM_MAX - needed_intregs +
3161 1) * 8), GE, const1_rtx, SImode,
3166 emit_cmp_and_jump_insns (expand_expr
3167 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3168 GEN_INT ((SSE_REGPARM_MAX -
3169 needed_sseregs + 1) * 16 +
3170 REGPARM_MAX * 8), GE, const1_rtx,
3171 SImode, 1, lab_false);
3174 /* Compute index to start of area used for integer regs. */
3177 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3178 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3179 if (r != int_addr_rtx)
3180 emit_move_insn (int_addr_rtx, r);
3184 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3185 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3186 if (r != sse_addr_rtx)
3187 emit_move_insn (sse_addr_rtx, r);
3195 /* Never use the memory itself, as it has the alias set. */
3196 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3197 mem = gen_rtx_MEM (BLKmode, x);
3198 force_operand (x, addr_rtx);
3199 set_mem_alias_set (mem, get_varargs_alias_set ());
3200 set_mem_align (mem, BITS_PER_UNIT);
3202 for (i = 0; i < XVECLEN (container, 0); i++)
3204 rtx slot = XVECEXP (container, 0, i);
3205 rtx reg = XEXP (slot, 0);
3206 enum machine_mode mode = GET_MODE (reg);
3212 if (SSE_REGNO_P (REGNO (reg)))
3214 src_addr = sse_addr_rtx;
3215 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3219 src_addr = int_addr_rtx;
3220 src_offset = REGNO (reg) * 8;
3222 src_mem = gen_rtx_MEM (mode, src_addr);
3223 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3224 src_mem = adjust_address (src_mem, mode, src_offset);
3225 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3226 emit_move_insn (dest_mem, src_mem);
3233 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3234 build_int_2 (needed_intregs * 8, 0));
3235 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3236 TREE_SIDE_EFFECTS (t) = 1;
3237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3242 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3243 build_int_2 (needed_sseregs * 16, 0));
3244 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3245 TREE_SIDE_EFFECTS (t) = 1;
3246 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3249 emit_jump_insn (gen_jump (lab_over));
3251 emit_label (lab_false);
3254 /* ... otherwise out of the overflow area. */
3256 /* Care for on-stack alignment if needed. */
3257 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3261 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3262 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3263 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3267 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3269 emit_move_insn (addr_rtx, r);
3272 build (PLUS_EXPR, TREE_TYPE (t), t,
3273 build_int_2 (rsize * UNITS_PER_WORD, 0));
3274 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3275 TREE_SIDE_EFFECTS (t) = 1;
3276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3279 emit_label (lab_over);
3283 r = gen_rtx_MEM (Pmode, addr_rtx);
3284 set_mem_alias_set (r, get_varargs_alias_set ());
3285 emit_move_insn (addr_rtx, r);
3291 /* Return nonzero if OP is either a i387 or SSE fp register. */
3293 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3295 return ANY_FP_REG_P (op);
3298 /* Return nonzero if OP is an i387 fp register. */
3300 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3302 return FP_REG_P (op);
3305 /* Return nonzero if OP is a non-fp register_operand. */
3307 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3309 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3312 /* Return nonzero if OP is a register operand other than an
3313 i387 fp register. */
3315 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3317 return register_operand (op, mode) && !FP_REG_P (op);
3320 /* Return nonzero if OP is general operand representable on x86_64. */
3323 x86_64_general_operand (rtx op, enum machine_mode mode)
3326 return general_operand (op, mode);
3327 if (nonimmediate_operand (op, mode))
3329 return x86_64_sign_extended_value (op);
3332 /* Return nonzero if OP is general operand representable on x86_64
3333 as either sign extended or zero extended constant. */
3336 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3339 return general_operand (op, mode);
3340 if (nonimmediate_operand (op, mode))
3342 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3345 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3348 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3351 return nonmemory_operand (op, mode);
3352 if (register_operand (op, mode))
3354 return x86_64_sign_extended_value (op);
3357 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3360 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3362 if (!TARGET_64BIT || !flag_pic)
3363 return nonmemory_operand (op, mode);
3364 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3366 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3371 /* Return nonzero if OPNUM's MEM should be matched
3372 in movabs* patterns. */
3375 ix86_check_movabs (rtx insn, int opnum)
3379 set = PATTERN (insn);
3380 if (GET_CODE (set) == PARALLEL)
3381 set = XVECEXP (set, 0, 0);
3382 if (GET_CODE (set) != SET)
3384 mem = XEXP (set, opnum);
3385 while (GET_CODE (mem) == SUBREG)
3386 mem = SUBREG_REG (mem);
3387 if (GET_CODE (mem) != MEM)
3389 return (volatile_ok || !MEM_VOLATILE_P (mem));
3392 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3395 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3398 return nonmemory_operand (op, mode);
3399 if (register_operand (op, mode))
3401 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3404 /* Return nonzero if OP is immediate operand representable on x86_64. */
3407 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3410 return immediate_operand (op, mode);
3411 return x86_64_sign_extended_value (op);
3414 /* Return nonzero if OP is immediate operand representable on x86_64. */
3417 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3419 return x86_64_zero_extended_value (op);
3422 /* Return nonzero if OP is (const_int 1), else return zero. */
3425 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3427 return op == const1_rtx;
3430 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3431 for shift & compare patterns, as shifting by 0 does not change flags),
3432 else return zero. */
3435 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3437 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3440 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3441 reference and a constant. */
3444 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3446 switch (GET_CODE (op))
3454 if (GET_CODE (op) == SYMBOL_REF
3455 || GET_CODE (op) == LABEL_REF
3456 || (GET_CODE (op) == UNSPEC
3457 && (XINT (op, 1) == UNSPEC_GOT
3458 || XINT (op, 1) == UNSPEC_GOTOFF
3459 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3461 if (GET_CODE (op) != PLUS
3462 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3466 if (GET_CODE (op) == SYMBOL_REF
3467 || GET_CODE (op) == LABEL_REF)
3469 /* Only @GOTOFF gets offsets. */
3470 if (GET_CODE (op) != UNSPEC
3471 || XINT (op, 1) != UNSPEC_GOTOFF)
3474 op = XVECEXP (op, 0, 0);
3475 if (GET_CODE (op) == SYMBOL_REF
3476 || GET_CODE (op) == LABEL_REF)
3485 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3488 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3490 if (GET_CODE (op) != CONST)
3495 if (GET_CODE (op) == UNSPEC
3496 && XINT (op, 1) == UNSPEC_GOTPCREL)
3498 if (GET_CODE (op) == PLUS
3499 && GET_CODE (XEXP (op, 0)) == UNSPEC
3500 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3505 if (GET_CODE (op) == UNSPEC)
3507 if (GET_CODE (op) != PLUS
3508 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3511 if (GET_CODE (op) == UNSPEC)
3517 /* Return true if OP is a symbolic operand that resolves locally. */
3520 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3522 if (GET_CODE (op) == CONST
3523 && GET_CODE (XEXP (op, 0)) == PLUS
3524 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3525 op = XEXP (XEXP (op, 0), 0);
3527 if (GET_CODE (op) == LABEL_REF)
3530 if (GET_CODE (op) != SYMBOL_REF)
3533 if (SYMBOL_REF_LOCAL_P (op))
3536 /* There is, however, a not insubstantial body of code in the rest of
3537 the compiler that assumes it can just stick the results of
3538 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3539 /* ??? This is a hack. Should update the body of the compiler to
3540 always create a DECL an invoke targetm.encode_section_info. */
3541 if (strncmp (XSTR (op, 0), internal_label_prefix,
3542 internal_label_prefix_len) == 0)
3548 /* Test for various thread-local symbols. */
3551 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3553 if (GET_CODE (op) != SYMBOL_REF)
3555 return SYMBOL_REF_TLS_MODEL (op);
3559 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3561 if (GET_CODE (op) != SYMBOL_REF)
3563 return SYMBOL_REF_TLS_MODEL (op) == kind;
3567 global_dynamic_symbolic_operand (rtx op,
3568 enum machine_mode mode ATTRIBUTE_UNUSED)
3570 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3574 local_dynamic_symbolic_operand (rtx op,
3575 enum machine_mode mode ATTRIBUTE_UNUSED)
3577 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3581 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3583 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3587 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3589 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3592 /* Test for a valid operand for a call instruction. Don't allow the
3593 arg pointer register or virtual regs since they may decay into
3594 reg + const, which the patterns can't handle. */
3597 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3599 /* Disallow indirect through a virtual register. This leads to
3600 compiler aborts when trying to eliminate them. */
3601 if (GET_CODE (op) == REG
3602 && (op == arg_pointer_rtx
3603 || op == frame_pointer_rtx
3604 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3605 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3608 /* Disallow `call 1234'. Due to varying assembler lameness this
3609 gets either rejected or translated to `call .+1234'. */
3610 if (GET_CODE (op) == CONST_INT)
3613 /* Explicitly allow SYMBOL_REF even if pic. */
3614 if (GET_CODE (op) == SYMBOL_REF)
3617 /* Otherwise we can allow any general_operand in the address. */
3618 return general_operand (op, Pmode);
3621 /* Test for a valid operand for a call instruction. Don't allow the
3622 arg pointer register or virtual regs since they may decay into
3623 reg + const, which the patterns can't handle. */
3626 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3628 /* Disallow indirect through a virtual register. This leads to
3629 compiler aborts when trying to eliminate them. */
3630 if (GET_CODE (op) == REG
3631 && (op == arg_pointer_rtx
3632 || op == frame_pointer_rtx
3633 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3634 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3637 /* Explicitly allow SYMBOL_REF even if pic. */
3638 if (GET_CODE (op) == SYMBOL_REF)
3641 /* Otherwise we can only allow register operands. */
3642 return register_operand (op, Pmode);
3646 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3648 if (GET_CODE (op) == CONST
3649 && GET_CODE (XEXP (op, 0)) == PLUS
3650 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3651 op = XEXP (XEXP (op, 0), 0);
3652 return GET_CODE (op) == SYMBOL_REF;
3655 /* Match exactly zero and one. */
3658 const0_operand (rtx op, enum machine_mode mode)
3660 return op == CONST0_RTX (mode);
3664 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3666 return op == const1_rtx;
3669 /* Match 2, 4, or 8. Used for leal multiplicands. */
3672 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3674 return (GET_CODE (op) == CONST_INT
3675 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3679 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3681 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3685 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3687 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3691 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3693 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3697 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3699 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3703 /* True if this is a constant appropriate for an increment or decrement. */
3706 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3708 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3709 registers, since carry flag is not set. */
3710 if (TARGET_PENTIUM4 && !optimize_size)
3712 return op == const1_rtx || op == constm1_rtx;
3715 /* Return nonzero if OP is acceptable as operand of DImode shift
3719 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3722 return nonimmediate_operand (op, mode);
3724 return register_operand (op, mode);
3727 /* Return false if this is the stack pointer, or any other fake
3728 register eliminable to the stack pointer. Otherwise, this is
3731 This is used to prevent esp from being used as an index reg.
3732 Which would only happen in pathological cases. */
3735 reg_no_sp_operand (rtx op, enum machine_mode mode)
3738 if (GET_CODE (t) == SUBREG)
3740 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3743 return register_operand (op, mode);
3747 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3749 return MMX_REG_P (op);
3752 /* Return false if this is any eliminable register. Otherwise
3756 general_no_elim_operand (rtx op, enum machine_mode mode)
3759 if (GET_CODE (t) == SUBREG)
3761 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3762 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3763 || t == virtual_stack_dynamic_rtx)
3766 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3767 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3770 return general_operand (op, mode);
3773 /* Return false if this is any eliminable register. Otherwise
3774 register_operand or const_int. */
3777 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3780 if (GET_CODE (t) == SUBREG)
3782 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3783 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3784 || t == virtual_stack_dynamic_rtx)
3787 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3790 /* Return false if this is any eliminable register or stack register,
3791 otherwise work like register_operand. */
3794 index_register_operand (rtx op, enum machine_mode mode)
3797 if (GET_CODE (t) == SUBREG)
3801 if (t == arg_pointer_rtx
3802 || t == frame_pointer_rtx
3803 || t == virtual_incoming_args_rtx
3804 || t == virtual_stack_vars_rtx
3805 || t == virtual_stack_dynamic_rtx
3806 || REGNO (t) == STACK_POINTER_REGNUM)
3809 return general_operand (op, mode);
3812 /* Return true if op is a Q_REGS class register. */
3815 q_regs_operand (rtx op, enum machine_mode mode)
3817 if (mode != VOIDmode && GET_MODE (op) != mode)
3819 if (GET_CODE (op) == SUBREG)
3820 op = SUBREG_REG (op);
3821 return ANY_QI_REG_P (op);
3824 /* Return true if op is an flags register. */
3827 flags_reg_operand (rtx op, enum machine_mode mode)
3829 if (mode != VOIDmode && GET_MODE (op) != mode)
3831 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3834 /* Return true if op is a NON_Q_REGS class register. */
3837 non_q_regs_operand (rtx op, enum machine_mode mode)
3839 if (mode != VOIDmode && GET_MODE (op) != mode)
3841 if (GET_CODE (op) == SUBREG)
3842 op = SUBREG_REG (op);
3843 return NON_QI_REG_P (op);
3847 zero_extended_scalar_load_operand (rtx op,
3848 enum machine_mode mode ATTRIBUTE_UNUSED)
3851 if (GET_CODE (op) != MEM)
3853 op = maybe_get_pool_constant (op);
3856 if (GET_CODE (op) != CONST_VECTOR)
3859 (GET_MODE_SIZE (GET_MODE (op)) /
3860 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3861 for (n_elts--; n_elts > 0; n_elts--)
3863 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3864 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3870 /* Return 1 when OP is operand acceptable for standard SSE move. */
3872 vector_move_operand (rtx op, enum machine_mode mode)
3874 if (nonimmediate_operand (op, mode))
3876 if (GET_MODE (op) != mode && mode != VOIDmode)
3878 return (op == CONST0_RTX (GET_MODE (op)));
3881 /* Return true if op if a valid address, and does not contain
3882 a segment override. */
3885 no_seg_address_operand (rtx op, enum machine_mode mode)
3887 struct ix86_address parts;
3889 if (! address_operand (op, mode))
3892 if (! ix86_decompose_address (op, &parts))
3895 return parts.seg == SEG_DEFAULT;
3898 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3901 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3903 enum rtx_code code = GET_CODE (op);
3906 /* Operations supported directly. */
3916 /* These are equivalent to ones above in non-IEEE comparisons. */
3923 return !TARGET_IEEE_FP;
3928 /* Return 1 if OP is a valid comparison operator in valid mode. */
3930 ix86_comparison_operator (rtx op, enum machine_mode mode)
3932 enum machine_mode inmode;
3933 enum rtx_code code = GET_CODE (op);
3934 if (mode != VOIDmode && GET_MODE (op) != mode)
3936 if (GET_RTX_CLASS (code) != '<')
3938 inmode = GET_MODE (XEXP (op, 0));
3940 if (inmode == CCFPmode || inmode == CCFPUmode)
3942 enum rtx_code second_code, bypass_code;
3943 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3944 return (bypass_code == NIL && second_code == NIL);
3951 if (inmode == CCmode || inmode == CCGCmode
3952 || inmode == CCGOCmode || inmode == CCNOmode)
3955 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3956 if (inmode == CCmode)
3960 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3968 /* Return 1 if OP is a valid comparison operator testing carry flag
3971 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
3973 enum machine_mode inmode;
3974 enum rtx_code code = GET_CODE (op);
3976 if (mode != VOIDmode && GET_MODE (op) != mode)
3978 if (GET_RTX_CLASS (code) != '<')
3980 inmode = GET_MODE (XEXP (op, 0));
3981 if (GET_CODE (XEXP (op, 0)) != REG
3982 || REGNO (XEXP (op, 0)) != 17
3983 || XEXP (op, 1) != const0_rtx)
3986 if (inmode == CCFPmode || inmode == CCFPUmode)
3988 enum rtx_code second_code, bypass_code;
3990 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3991 if (bypass_code != NIL || second_code != NIL)
3993 code = ix86_fp_compare_code_to_integer (code);
3995 else if (inmode != CCmode)
4000 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4003 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4005 enum machine_mode inmode;
4006 enum rtx_code code = GET_CODE (op);
4008 if (mode != VOIDmode && GET_MODE (op) != mode)
4010 if (GET_RTX_CLASS (code) != '<')
4012 inmode = GET_MODE (XEXP (op, 0));
4013 if (inmode == CCFPmode || inmode == CCFPUmode)
4015 enum rtx_code second_code, bypass_code;
4017 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4018 if (bypass_code != NIL || second_code != NIL)
4020 code = ix86_fp_compare_code_to_integer (code);
4022 /* i387 supports just limited amount of conditional codes. */
4025 case LTU: case GTU: case LEU: case GEU:
4026 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4029 case ORDERED: case UNORDERED:
4037 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4040 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4042 switch (GET_CODE (op))
4045 /* Modern CPUs have same latency for HImode and SImode multiply,
4046 but 386 and 486 do HImode multiply faster. */
4047 return ix86_tune > PROCESSOR_I486;
4059 /* Nearly general operand, but accept any const_double, since we wish
4060 to be able to drop them into memory rather than have them get pulled
4064 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4066 if (mode != VOIDmode && mode != GET_MODE (op))
4068 if (GET_CODE (op) == CONST_DOUBLE)
4070 return general_operand (op, mode);
4073 /* Match an SI or HImode register for a zero_extract. */
4076 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4079 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4080 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4083 if (!register_operand (op, VOIDmode))
4086 /* Be careful to accept only registers having upper parts. */
4087 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4088 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4091 /* Return 1 if this is a valid binary floating-point operation.
4092 OP is the expression matched, and MODE is its mode. */
4095 binary_fp_operator (rtx op, enum machine_mode mode)
4097 if (mode != VOIDmode && mode != GET_MODE (op))
4100 switch (GET_CODE (op))
4106 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4114 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4116 return GET_CODE (op) == MULT;
4120 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4122 return GET_CODE (op) == DIV;
4126 arith_or_logical_operator (rtx op, enum machine_mode mode)
4128 return ((mode == VOIDmode || GET_MODE (op) == mode)
4129 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4130 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4133 /* Returns 1 if OP is memory operand with a displacement. */
4136 memory_displacement_operand (rtx op, enum machine_mode mode)
4138 struct ix86_address parts;
4140 if (! memory_operand (op, mode))
4143 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4146 return parts.disp != NULL_RTX;
4149 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4150 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4152 ??? It seems likely that this will only work because cmpsi is an
4153 expander, and no actual insns use this. */
4156 cmpsi_operand (rtx op, enum machine_mode mode)
4158 if (nonimmediate_operand (op, mode))
4161 if (GET_CODE (op) == AND
4162 && GET_MODE (op) == SImode
4163 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4164 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4165 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4166 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4167 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4168 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4174 /* Returns 1 if OP is memory operand that can not be represented by the
4178 long_memory_operand (rtx op, enum machine_mode mode)
4180 if (! memory_operand (op, mode))
4183 return memory_address_length (op) != 0;
4186 /* Return nonzero if the rtx is known aligned. */
4189 aligned_operand (rtx op, enum machine_mode mode)
4191 struct ix86_address parts;
4193 if (!general_operand (op, mode))
4196 /* Registers and immediate operands are always "aligned". */
4197 if (GET_CODE (op) != MEM)
4200 /* Don't even try to do any aligned optimizations with volatiles. */
4201 if (MEM_VOLATILE_P (op))
4206 /* Pushes and pops are only valid on the stack pointer. */
4207 if (GET_CODE (op) == PRE_DEC
4208 || GET_CODE (op) == POST_INC)
4211 /* Decode the address. */
4212 if (! ix86_decompose_address (op, &parts))
4215 if (parts.base && GET_CODE (parts.base) == SUBREG)
4216 parts.base = SUBREG_REG (parts.base);
4217 if (parts.index && GET_CODE (parts.index) == SUBREG)
4218 parts.index = SUBREG_REG (parts.index);
4220 /* Look for some component that isn't known to be aligned. */
4224 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4229 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4234 if (GET_CODE (parts.disp) != CONST_INT
4235 || (INTVAL (parts.disp) & 3) != 0)
4239 /* Didn't find one -- this must be an aligned address. */
4243 /* Initialize the table of extra 80387 mathematical constants. */
4246 init_ext_80387_constants (void)
4248 static const char * cst[5] =
4250 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4251 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4252 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4253 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4254 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4258 for (i = 0; i < 5; i++)
4260 real_from_string (&ext_80387_constants_table[i], cst[i]);
4261 /* Ensure each constant is rounded to XFmode precision. */
4262 real_convert (&ext_80387_constants_table[i],
4263 XFmode, &ext_80387_constants_table[i]);
4266 ext_80387_constants_init = 1;
4269 /* Return true if the constant is something that can be loaded with
4270 a special instruction. */
4273 standard_80387_constant_p (rtx x)
4275 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4278 if (x == CONST0_RTX (GET_MODE (x)))
4280 if (x == CONST1_RTX (GET_MODE (x)))
4283 /* For XFmode constants, try to find a special 80387 instruction on
4284 those CPUs that benefit from them. */
4285 if (GET_MODE (x) == XFmode
4286 && x86_ext_80387_constants & TUNEMASK)
4291 if (! ext_80387_constants_init)
4292 init_ext_80387_constants ();
4294 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4295 for (i = 0; i < 5; i++)
4296 if (real_identical (&r, &ext_80387_constants_table[i]))
4303 /* Return the opcode of the special instruction to be used to load
4307 standard_80387_constant_opcode (rtx x)
4309 switch (standard_80387_constant_p (x))
4329 /* Return the CONST_DOUBLE representing the 80387 constant that is
4330 loaded by the specified special instruction. The argument IDX
4331 matches the return value from standard_80387_constant_p. */
4334 standard_80387_constant_rtx (int idx)
4338 if (! ext_80387_constants_init)
4339 init_ext_80387_constants ();
4355 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4359 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4362 standard_sse_constant_p (rtx x)
4364 if (x == const0_rtx)
4366 return (x == CONST0_RTX (GET_MODE (x)));
4369 /* Returns 1 if OP contains a symbol reference */
4372 symbolic_reference_mentioned_p (rtx op)
4377 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4380 fmt = GET_RTX_FORMAT (GET_CODE (op));
4381 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4387 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4388 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4392 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4399 /* Return 1 if it is appropriate to emit `ret' instructions in the
4400 body of a function. Do this only if the epilogue is simple, needing a
4401 couple of insns. Prior to reloading, we can't tell how many registers
4402 must be saved, so return 0 then. Return 0 if there is no frame
4403 marker to de-allocate.
4405 If NON_SAVING_SETJMP is defined and true, then it is not possible
4406 for the epilogue to be simple, so return 0. This is a special case
4407 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4408 until final, but jump_optimize may need to know sooner if a
4412 ix86_can_use_return_insn_p (void)
4414 struct ix86_frame frame;
4416 #ifdef NON_SAVING_SETJMP
4417 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4421 if (! reload_completed || frame_pointer_needed)
4424 /* Don't allow more than 32 pop, since that's all we can do
4425 with one instruction. */
4426 if (current_function_pops_args
4427 && current_function_args_size >= 32768)
4430 ix86_compute_frame_layout (&frame);
4431 return frame.to_allocate == 0 && frame.nregs == 0;
4434 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4436 x86_64_sign_extended_value (rtx value)
4438 switch (GET_CODE (value))
4440 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4441 to be at least 32 and this all acceptable constants are
4442 represented as CONST_INT. */
4444 if (HOST_BITS_PER_WIDE_INT == 32)
4448 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4449 return trunc_int_for_mode (val, SImode) == val;
4453 /* For certain code models, the symbolic references are known to fit.
4454 in CM_SMALL_PIC model we know it fits if it is local to the shared
4455 library. Don't count TLS SYMBOL_REFs here, since they should fit
4456 only if inside of UNSPEC handled below. */
4458 /* TLS symbols are not constant. */
4459 if (tls_symbolic_operand (value, Pmode))
4461 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4463 /* For certain code models, the code is near as well. */
4465 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4466 || ix86_cmodel == CM_KERNEL);
4468 /* We also may accept the offsetted memory references in certain special
4471 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4472 switch (XINT (XEXP (value, 0), 1))
4474 case UNSPEC_GOTPCREL:
4476 case UNSPEC_GOTNTPOFF:
4482 if (GET_CODE (XEXP (value, 0)) == PLUS)
4484 rtx op1 = XEXP (XEXP (value, 0), 0);
4485 rtx op2 = XEXP (XEXP (value, 0), 1);
4486 HOST_WIDE_INT offset;
4488 if (ix86_cmodel == CM_LARGE)
4490 if (GET_CODE (op2) != CONST_INT)
4492 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4493 switch (GET_CODE (op1))
4496 /* For CM_SMALL assume that latest object is 16MB before
4497 end of 31bits boundary. We may also accept pretty
4498 large negative constants knowing that all objects are
4499 in the positive half of address space. */
4500 if (ix86_cmodel == CM_SMALL
4501 && offset < 16*1024*1024
4502 && trunc_int_for_mode (offset, SImode) == offset)
4504 /* For CM_KERNEL we know that all object resist in the
4505 negative half of 32bits address space. We may not
4506 accept negative offsets, since they may be just off
4507 and we may accept pretty large positive ones. */
4508 if (ix86_cmodel == CM_KERNEL
4510 && trunc_int_for_mode (offset, SImode) == offset)
4514 /* These conditions are similar to SYMBOL_REF ones, just the
4515 constraints for code models differ. */
4516 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4517 && offset < 16*1024*1024
4518 && trunc_int_for_mode (offset, SImode) == offset)
4520 if (ix86_cmodel == CM_KERNEL
4522 && trunc_int_for_mode (offset, SImode) == offset)
4526 switch (XINT (op1, 1))
4531 && trunc_int_for_mode (offset, SImode) == offset)
4545 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4547 x86_64_zero_extended_value (rtx value)
4549 switch (GET_CODE (value))
4552 if (HOST_BITS_PER_WIDE_INT == 32)
4553 return (GET_MODE (value) == VOIDmode
4554 && !CONST_DOUBLE_HIGH (value));
4558 if (HOST_BITS_PER_WIDE_INT == 32)
4559 return INTVAL (value) >= 0;
4561 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4564 /* For certain code models, the symbolic references are known to fit. */
4566 /* TLS symbols are not constant. */
4567 if (tls_symbolic_operand (value, Pmode))
4569 return ix86_cmodel == CM_SMALL;
4571 /* For certain code models, the code is near as well. */
4573 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4575 /* We also may accept the offsetted memory references in certain special
4578 if (GET_CODE (XEXP (value, 0)) == PLUS)
4580 rtx op1 = XEXP (XEXP (value, 0), 0);
4581 rtx op2 = XEXP (XEXP (value, 0), 1);
4583 if (ix86_cmodel == CM_LARGE)
4585 switch (GET_CODE (op1))
4589 /* For small code model we may accept pretty large positive
4590 offsets, since one bit is available for free. Negative
4591 offsets are limited by the size of NULL pointer area
4592 specified by the ABI. */
4593 if (ix86_cmodel == CM_SMALL
4594 && GET_CODE (op2) == CONST_INT
4595 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4596 && (trunc_int_for_mode (INTVAL (op2), SImode)
4599 /* ??? For the kernel, we may accept adjustment of
4600 -0x10000000, since we know that it will just convert
4601 negative address space to positive, but perhaps this
4602 is not worthwhile. */
4605 /* These conditions are similar to SYMBOL_REF ones, just the
4606 constraints for code models differ. */
4607 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4608 && GET_CODE (op2) == CONST_INT
4609 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4610 && (trunc_int_for_mode (INTVAL (op2), SImode)
4624 /* Value should be nonzero if functions must have frame pointers.
4625 Zero means the frame pointer need not be set up (and parms may
4626 be accessed via the stack pointer) in functions that seem suitable. */
4629 ix86_frame_pointer_required (void)
4631 /* If we accessed previous frames, then the generated code expects
4632 to be able to access the saved ebp value in our frame. */
4633 if (cfun->machine->accesses_prev_frame)
4636 /* Several x86 os'es need a frame pointer for other reasons,
4637 usually pertaining to setjmp. */
4638 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4641 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4642 the frame pointer by default. Turn it back on now if we've not
4643 got a leaf function. */
4644 if (TARGET_OMIT_LEAF_FRAME_POINTER
4645 && (!current_function_is_leaf))
4648 if (current_function_profile)
4654 /* Record that the current function accesses previous call frames. */
4657 ix86_setup_frame_addresses (void)
4659 cfun->machine->accesses_prev_frame = 1;
4662 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4663 # define USE_HIDDEN_LINKONCE 1
4665 # define USE_HIDDEN_LINKONCE 0
4668 static int pic_labels_used;
4670 /* Fills in the label name that should be used for a pc thunk for
4671 the given register. */
4674 get_pc_thunk_name (char name[32], unsigned int regno)
4676 if (USE_HIDDEN_LINKONCE)
4677 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4679 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4683 /* This function generates code for -fpic that loads %ebx with
4684 the return address of the caller and then returns. */
4687 ix86_file_end (void)
4692 for (regno = 0; regno < 8; ++regno)
4696 if (! ((pic_labels_used >> regno) & 1))
4699 get_pc_thunk_name (name, regno);
4701 if (USE_HIDDEN_LINKONCE)
4705 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4707 TREE_PUBLIC (decl) = 1;
4708 TREE_STATIC (decl) = 1;
4709 DECL_ONE_ONLY (decl) = 1;
4711 (*targetm.asm_out.unique_section) (decl, 0);
4712 named_section (decl, NULL, 0);
4714 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4715 fputs ("\t.hidden\t", asm_out_file);
4716 assemble_name (asm_out_file, name);
4717 fputc ('\n', asm_out_file);
4718 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4723 ASM_OUTPUT_LABEL (asm_out_file, name);
4726 xops[0] = gen_rtx_REG (SImode, regno);
4727 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4728 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4729 output_asm_insn ("ret", xops);
4732 if (NEED_INDICATE_EXEC_STACK)
4733 file_end_indicate_exec_stack ();
4736 /* Emit code for the SET_GOT patterns. */
4739 output_set_got (rtx dest)
4744 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4746 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4748 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4751 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4753 output_asm_insn ("call\t%a2", xops);
4756 /* Output the "canonical" label name ("Lxx$pb") here too. This
4757 is what will be referred to by the Mach-O PIC subsystem. */
4758 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4760 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4761 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4764 output_asm_insn ("pop{l}\t%0", xops);
4769 get_pc_thunk_name (name, REGNO (dest));
4770 pic_labels_used |= 1 << REGNO (dest);
4772 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4773 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4774 output_asm_insn ("call\t%X2", xops);
4777 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4778 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4779 else if (!TARGET_MACHO)
4780 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4785 /* Generate an "push" pattern for input ARG. */
4790 return gen_rtx_SET (VOIDmode,
4792 gen_rtx_PRE_DEC (Pmode,
4793 stack_pointer_rtx)),
4797 /* Return >= 0 if there is an unused call-clobbered register available
4798 for the entire function. */
4801 ix86_select_alt_pic_regnum (void)
4803 if (current_function_is_leaf && !current_function_profile)
4806 for (i = 2; i >= 0; --i)
4807 if (!regs_ever_live[i])
4811 return INVALID_REGNUM;
4814 /* Return 1 if we need to save REGNO. */
4816 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4818 if (pic_offset_table_rtx
4819 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4820 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4821 || current_function_profile
4822 || current_function_calls_eh_return
4823 || current_function_uses_const_pool))
4825 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4830 if (current_function_calls_eh_return && maybe_eh_return)
4835 unsigned test = EH_RETURN_DATA_REGNO (i);
4836 if (test == INVALID_REGNUM)
4843 return (regs_ever_live[regno]
4844 && !call_used_regs[regno]
4845 && !fixed_regs[regno]
4846 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4849 /* Return number of registers to be saved on the stack. */
4852 ix86_nsaved_regs (void)
4857 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4858 if (ix86_save_reg (regno, true))
4863 /* Return the offset between two registers, one to be eliminated, and the other
4864 its replacement, at the start of a routine. */
4867 ix86_initial_elimination_offset (int from, int to)
4869 struct ix86_frame frame;
4870 ix86_compute_frame_layout (&frame);
4872 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4873 return frame.hard_frame_pointer_offset;
4874 else if (from == FRAME_POINTER_REGNUM
4875 && to == HARD_FRAME_POINTER_REGNUM)
4876 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4879 if (to != STACK_POINTER_REGNUM)
4881 else if (from == ARG_POINTER_REGNUM)
4882 return frame.stack_pointer_offset;
4883 else if (from != FRAME_POINTER_REGNUM)
4886 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4890 /* Fill structure ix86_frame about frame of currently computed function. */
4893 ix86_compute_frame_layout (struct ix86_frame *frame)
4895 HOST_WIDE_INT total_size;
4896 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4897 HOST_WIDE_INT offset;
4898 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4899 HOST_WIDE_INT size = get_frame_size ();
4901 frame->nregs = ix86_nsaved_regs ();
4904 /* During reload iteration the amount of registers saved can change.
4905 Recompute the value as needed. Do not recompute when amount of registers
4906 didn't change as reload does mutiple calls to the function and does not
4907 expect the decision to change within single iteration. */
4909 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4911 int count = frame->nregs;
4913 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4914 /* The fast prologue uses move instead of push to save registers. This
4915 is significantly longer, but also executes faster as modern hardware
4916 can execute the moves in parallel, but can't do that for push/pop.
4918 Be careful about choosing what prologue to emit: When function takes
4919 many instructions to execute we may use slow version as well as in
4920 case function is known to be outside hot spot (this is known with
4921 feedback only). Weight the size of function by number of registers
4922 to save as it is cheap to use one or two push instructions but very
4923 slow to use many of them. */
4925 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4926 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4927 || (flag_branch_probabilities
4928 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4929 cfun->machine->use_fast_prologue_epilogue = false;
4931 cfun->machine->use_fast_prologue_epilogue
4932 = !expensive_function_p (count);
4934 if (TARGET_PROLOGUE_USING_MOVE
4935 && cfun->machine->use_fast_prologue_epilogue)
4936 frame->save_regs_using_mov = true;
4938 frame->save_regs_using_mov = false;
4941 /* Skip return address and saved base pointer. */
4942 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4944 frame->hard_frame_pointer_offset = offset;
4946 /* Do some sanity checking of stack_alignment_needed and
4947 preferred_alignment, since i386 port is the only using those features
4948 that may break easily. */
4950 if (size && !stack_alignment_needed)
4952 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4954 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4956 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4959 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4960 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4962 /* Register save area */
4963 offset += frame->nregs * UNITS_PER_WORD;
4966 if (ix86_save_varrargs_registers)
4968 offset += X86_64_VARARGS_SIZE;
4969 frame->va_arg_size = X86_64_VARARGS_SIZE;
4972 frame->va_arg_size = 0;
4974 /* Align start of frame for local function. */
4975 frame->padding1 = ((offset + stack_alignment_needed - 1)
4976 & -stack_alignment_needed) - offset;
4978 offset += frame->padding1;
4980 /* Frame pointer points here. */
4981 frame->frame_pointer_offset = offset;
4985 /* Add outgoing arguments area. Can be skipped if we eliminated
4986 all the function calls as dead code. */
4987 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4989 offset += current_function_outgoing_args_size;
4990 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4993 frame->outgoing_arguments_size = 0;
4995 /* Align stack boundary. Only needed if we're calling another function
4997 if (!current_function_is_leaf || current_function_calls_alloca)
4998 frame->padding2 = ((offset + preferred_alignment - 1)
4999 & -preferred_alignment) - offset;
5001 frame->padding2 = 0;
5003 offset += frame->padding2;
5005 /* We've reached end of stack frame. */
5006 frame->stack_pointer_offset = offset;
5008 /* Size prologue needs to allocate. */
5009 frame->to_allocate =
5010 (size + frame->padding1 + frame->padding2
5011 + frame->outgoing_arguments_size + frame->va_arg_size);
5013 if ((!frame->to_allocate && frame->nregs <= 1)
5014 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5015 frame->save_regs_using_mov = false;
5017 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5018 && current_function_is_leaf)
5020 frame->red_zone_size = frame->to_allocate;
5021 if (frame->save_regs_using_mov)
5022 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5023 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5024 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5027 frame->red_zone_size = 0;
5028 frame->to_allocate -= frame->red_zone_size;
5029 frame->stack_pointer_offset -= frame->red_zone_size;
5031 fprintf (stderr, "nregs: %i\n", frame->nregs);
5032 fprintf (stderr, "size: %i\n", size);
5033 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5034 fprintf (stderr, "padding1: %i\n", frame->padding1);
5035 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5036 fprintf (stderr, "padding2: %i\n", frame->padding2);
5037 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5038 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5039 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5040 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5041 frame->hard_frame_pointer_offset);
5042 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5046 /* Emit code to save registers in the prologue. */
5049 ix86_emit_save_regs (void)
5054 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5055 if (ix86_save_reg (regno, true))
5057 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5058 RTX_FRAME_RELATED_P (insn) = 1;
5062 /* Emit code to save registers using MOV insns. First register
5063 is restored from POINTER + OFFSET. */
5065 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5070 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5071 if (ix86_save_reg (regno, true))
5073 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5075 gen_rtx_REG (Pmode, regno));
5076 RTX_FRAME_RELATED_P (insn) = 1;
5077 offset += UNITS_PER_WORD;
5081 /* Expand prologue or epilogue stack adjustment.
5082 The pattern exist to put a dependency on all ebp-based memory accesses.
5083 STYLE should be negative if instructions should be marked as frame related,
5084 zero if %r11 register is live and cannot be freely used and positive
5088 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5093 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5094 else if (x86_64_immediate_operand (offset, DImode))
5095 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5099 /* r11 is used by indirect sibcall return as well, set before the
5100 epilogue and used after the epilogue. ATM indirect sibcall
5101 shouldn't be used together with huge frame sizes in one
5102 function because of the frame_size check in sibcall.c. */
5105 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5106 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5108 RTX_FRAME_RELATED_P (insn) = 1;
5109 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5113 RTX_FRAME_RELATED_P (insn) = 1;
5116 /* Expand the prologue into a bunch of separate insns. */
5119 ix86_expand_prologue (void)
5123 struct ix86_frame frame;
5124 HOST_WIDE_INT allocate;
5126 ix86_compute_frame_layout (&frame);
5128 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5129 slower on all targets. Also sdb doesn't like it. */
5131 if (frame_pointer_needed)
5133 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5134 RTX_FRAME_RELATED_P (insn) = 1;
5136 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5137 RTX_FRAME_RELATED_P (insn) = 1;
5140 allocate = frame.to_allocate;
5142 if (!frame.save_regs_using_mov)
5143 ix86_emit_save_regs ();
5145 allocate += frame.nregs * UNITS_PER_WORD;
5147 /* When using red zone we may start register saving before allocating
5148 the stack frame saving one cycle of the prologue. */
5149 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5150 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5151 : stack_pointer_rtx,
5152 -frame.nregs * UNITS_PER_WORD);
5156 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5157 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5158 GEN_INT (-allocate), -1);
5161 /* Only valid for Win32. */
5162 rtx eax = gen_rtx_REG (SImode, 0);
5163 bool eax_live = ix86_eax_live_at_start_p ();
5170 emit_insn (gen_push (eax));
5174 insn = emit_move_insn (eax, GEN_INT (allocate));
5175 RTX_FRAME_RELATED_P (insn) = 1;
5177 insn = emit_insn (gen_allocate_stack_worker (eax));
5178 RTX_FRAME_RELATED_P (insn) = 1;
5182 rtx t = plus_constant (stack_pointer_rtx, allocate);
5183 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5187 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5189 if (!frame_pointer_needed || !frame.to_allocate)
5190 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5192 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5193 -frame.nregs * UNITS_PER_WORD);
5196 pic_reg_used = false;
5197 if (pic_offset_table_rtx
5198 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5199 || current_function_profile))
5201 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5203 if (alt_pic_reg_used != INVALID_REGNUM)
5204 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5206 pic_reg_used = true;
5211 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5213 /* Even with accurate pre-reload life analysis, we can wind up
5214 deleting all references to the pic register after reload.
5215 Consider if cross-jumping unifies two sides of a branch
5216 controlled by a comparison vs the only read from a global.
5217 In which case, allow the set_got to be deleted, though we're
5218 too late to do anything about the ebx save in the prologue. */
5219 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5222 /* Prevent function calls from be scheduled before the call to mcount.
5223 In the pic_reg_used case, make sure that the got load isn't deleted. */
5224 if (current_function_profile)
5225 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5228 /* Emit code to restore saved registers using MOV insns. First register
5229 is restored from POINTER + OFFSET. */
5231 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5235 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5236 if (ix86_save_reg (regno, maybe_eh_return))
5238 emit_move_insn (gen_rtx_REG (Pmode, regno),
5239 adjust_address (gen_rtx_MEM (Pmode, pointer),
5241 offset += UNITS_PER_WORD;
5245 /* Restore function stack, frame, and registers. */
5248 ix86_expand_epilogue (int style)
5251 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5252 struct ix86_frame frame;
5253 HOST_WIDE_INT offset;
5255 ix86_compute_frame_layout (&frame);
5257 /* Calculate start of saved registers relative to ebp. Special care
5258 must be taken for the normal return case of a function using
5259 eh_return: the eax and edx registers are marked as saved, but not
5260 restored along this path. */
5261 offset = frame.nregs;
5262 if (current_function_calls_eh_return && style != 2)
5264 offset *= -UNITS_PER_WORD;
5266 /* If we're only restoring one register and sp is not valid then
5267 using a move instruction to restore the register since it's
5268 less work than reloading sp and popping the register.
5270 The default code result in stack adjustment using add/lea instruction,
5271 while this code results in LEAVE instruction (or discrete equivalent),
5272 so it is profitable in some other cases as well. Especially when there
5273 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5274 and there is exactly one register to pop. This heuristic may need some
5275 tuning in future. */
5276 if ((!sp_valid && frame.nregs <= 1)
5277 || (TARGET_EPILOGUE_USING_MOVE
5278 && cfun->machine->use_fast_prologue_epilogue
5279 && (frame.nregs > 1 || frame.to_allocate))
5280 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5281 || (frame_pointer_needed && TARGET_USE_LEAVE
5282 && cfun->machine->use_fast_prologue_epilogue
5283 && frame.nregs == 1)
5284 || current_function_calls_eh_return)
5286 /* Restore registers. We can use ebp or esp to address the memory
5287 locations. If both are available, default to ebp, since offsets
5288 are known to be small. Only exception is esp pointing directly to the
5289 end of block of saved registers, where we may simplify addressing
5292 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5293 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5294 frame.to_allocate, style == 2);
5296 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5297 offset, style == 2);
5299 /* eh_return epilogues need %ecx added to the stack pointer. */
5302 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5304 if (frame_pointer_needed)
5306 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5307 tmp = plus_constant (tmp, UNITS_PER_WORD);
5308 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5310 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5311 emit_move_insn (hard_frame_pointer_rtx, tmp);
5313 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5318 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5319 tmp = plus_constant (tmp, (frame.to_allocate
5320 + frame.nregs * UNITS_PER_WORD));
5321 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5324 else if (!frame_pointer_needed)
5325 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5326 GEN_INT (frame.to_allocate
5327 + frame.nregs * UNITS_PER_WORD),
5329 /* If not an i386, mov & pop is faster than "leave". */
5330 else if (TARGET_USE_LEAVE || optimize_size
5331 || !cfun->machine->use_fast_prologue_epilogue)
5332 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5335 pro_epilogue_adjust_stack (stack_pointer_rtx,
5336 hard_frame_pointer_rtx,
5339 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5341 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5346 /* First step is to deallocate the stack frame so that we can
5347 pop the registers. */
5350 if (!frame_pointer_needed)
5352 pro_epilogue_adjust_stack (stack_pointer_rtx,
5353 hard_frame_pointer_rtx,
5354 GEN_INT (offset), style);
5356 else if (frame.to_allocate)
5357 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5358 GEN_INT (frame.to_allocate), style);
5360 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5361 if (ix86_save_reg (regno, false))
5364 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5366 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5368 if (frame_pointer_needed)
5370 /* Leave results in shorter dependency chains on CPUs that are
5371 able to grok it fast. */
5372 if (TARGET_USE_LEAVE)
5373 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5374 else if (TARGET_64BIT)
5375 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5377 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5381 /* Sibcall epilogues don't want a return instruction. */
5385 if (current_function_pops_args && current_function_args_size)
5387 rtx popc = GEN_INT (current_function_pops_args);
5389 /* i386 can only pop 64K bytes. If asked to pop more, pop
5390 return address, do explicit add, and jump indirectly to the
5393 if (current_function_pops_args >= 65536)
5395 rtx ecx = gen_rtx_REG (SImode, 2);
5397 /* There is no "pascal" calling convention in 64bit ABI. */
5401 emit_insn (gen_popsi1 (ecx));
5402 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5403 emit_jump_insn (gen_return_indirect_internal (ecx));
5406 emit_jump_insn (gen_return_pop_internal (popc));
5409 emit_jump_insn (gen_return_internal ());
5412 /* Reset from the function's potential modifications. */
5415 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5416 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5418 if (pic_offset_table_rtx)
5419 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5422 /* Extract the parts of an RTL expression that is a valid memory address
5423 for an instruction. Return 0 if the structure of the address is
5424 grossly off. Return -1 if the address contains ASHIFT, so it is not
5425 strictly valid, but still used for computing length of lea instruction. */
5428 ix86_decompose_address (rtx addr, struct ix86_address *out)
5430 rtx base = NULL_RTX;
5431 rtx index = NULL_RTX;
5432 rtx disp = NULL_RTX;
5433 HOST_WIDE_INT scale = 1;
5434 rtx scale_rtx = NULL_RTX;
5436 enum ix86_address_seg seg = SEG_DEFAULT;
5438 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5440 else if (GET_CODE (addr) == PLUS)
5450 addends[n++] = XEXP (op, 1);
5453 while (GET_CODE (op) == PLUS);
5458 for (i = n; i >= 0; --i)
5461 switch (GET_CODE (op))
5466 index = XEXP (op, 0);
5467 scale_rtx = XEXP (op, 1);
5471 if (XINT (op, 1) == UNSPEC_TP
5472 && TARGET_TLS_DIRECT_SEG_REFS
5473 && seg == SEG_DEFAULT)
5474 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5503 else if (GET_CODE (addr) == MULT)
5505 index = XEXP (addr, 0); /* index*scale */
5506 scale_rtx = XEXP (addr, 1);
5508 else if (GET_CODE (addr) == ASHIFT)
5512 /* We're called for lea too, which implements ashift on occasion. */
5513 index = XEXP (addr, 0);
5514 tmp = XEXP (addr, 1);
5515 if (GET_CODE (tmp) != CONST_INT)
5517 scale = INTVAL (tmp);
5518 if ((unsigned HOST_WIDE_INT) scale > 3)
5524 disp = addr; /* displacement */
5526 /* Extract the integral value of scale. */
5529 if (GET_CODE (scale_rtx) != CONST_INT)
5531 scale = INTVAL (scale_rtx);
5534 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5535 if (base && index && scale == 1
5536 && (index == arg_pointer_rtx
5537 || index == frame_pointer_rtx
5538 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5545 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5546 if ((base == hard_frame_pointer_rtx
5547 || base == frame_pointer_rtx
5548 || base == arg_pointer_rtx) && !disp)
5551 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5552 Avoid this by transforming to [%esi+0]. */
5553 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5554 && base && !index && !disp
5556 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5559 /* Special case: encode reg+reg instead of reg*2. */
5560 if (!base && index && scale && scale == 2)
5561 base = index, scale = 1;
5563 /* Special case: scaling cannot be encoded without base or displacement. */
5564 if (!base && !disp && index && scale != 1)
5576 /* Return cost of the memory address x.
5577 For i386, it is better to use a complex address than let gcc copy
5578 the address into a reg and make a new pseudo. But not if the address
5579 requires to two regs - that would mean more pseudos with longer
5582 ix86_address_cost (rtx x)
5584 struct ix86_address parts;
5587 if (!ix86_decompose_address (x, &parts))
5590 if (parts.base && GET_CODE (parts.base) == SUBREG)
5591 parts.base = SUBREG_REG (parts.base);
5592 if (parts.index && GET_CODE (parts.index) == SUBREG)
5593 parts.index = SUBREG_REG (parts.index);
5595 /* More complex memory references are better. */
5596 if (parts.disp && parts.disp != const0_rtx)
5598 if (parts.seg != SEG_DEFAULT)
5601 /* Attempt to minimize number of registers in the address. */
5603 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5605 && (!REG_P (parts.index)
5606 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5610 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5612 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5613 && parts.base != parts.index)
5616 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5617 since it's predecode logic can't detect the length of instructions
5618 and it degenerates to vector decoded. Increase cost of such
5619 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5620 to split such addresses or even refuse such addresses at all.
5622 Following addressing modes are affected:
5627 The first and last case may be avoidable by explicitly coding the zero in
5628 memory address, but I don't have AMD-K6 machine handy to check this
5632 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5633 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5634 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5640 /* If X is a machine specific address (i.e. a symbol or label being
5641 referenced as a displacement from the GOT implemented using an
5642 UNSPEC), then return the base term. Otherwise return X. */
5645 ix86_find_base_term (rtx x)
5651 if (GET_CODE (x) != CONST)
5654 if (GET_CODE (term) == PLUS
5655 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5656 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5657 term = XEXP (term, 0);
5658 if (GET_CODE (term) != UNSPEC
5659 || XINT (term, 1) != UNSPEC_GOTPCREL)
5662 term = XVECEXP (term, 0, 0);
5664 if (GET_CODE (term) != SYMBOL_REF
5665 && GET_CODE (term) != LABEL_REF)
5671 term = ix86_delegitimize_address (x);
5673 if (GET_CODE (term) != SYMBOL_REF
5674 && GET_CODE (term) != LABEL_REF)
5680 /* Determine if a given RTX is a valid constant. We already know this
5681 satisfies CONSTANT_P. */
5684 legitimate_constant_p (rtx x)
5688 switch (GET_CODE (x))
5691 /* TLS symbols are not constant. */
5692 if (tls_symbolic_operand (x, Pmode))
5697 inner = XEXP (x, 0);
5699 /* Offsets of TLS symbols are never valid.
5700 Discourage CSE from creating them. */
5701 if (GET_CODE (inner) == PLUS
5702 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5705 if (GET_CODE (inner) == PLUS)
5707 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5709 inner = XEXP (inner, 0);
5712 /* Only some unspecs are valid as "constants". */
5713 if (GET_CODE (inner) == UNSPEC)
5714 switch (XINT (inner, 1))
5718 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5720 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5730 /* Otherwise we handle everything else in the move patterns. */
5734 /* Determine if it's legal to put X into the constant pool. This
5735 is not possible for the address of thread-local symbols, which
5736 is checked above. */
5739 ix86_cannot_force_const_mem (rtx x)
5741 return !legitimate_constant_p (x);
5744 /* Determine if a given RTX is a valid constant address. */
5747 constant_address_p (rtx x)
5749 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5752 /* Nonzero if the constant value X is a legitimate general operand
5753 when generating PIC code. It is given that flag_pic is on and
5754 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5757 legitimate_pic_operand_p (rtx x)
5761 switch (GET_CODE (x))
5764 inner = XEXP (x, 0);
5766 /* Only some unspecs are valid as "constants". */
5767 if (GET_CODE (inner) == UNSPEC)
5768 switch (XINT (inner, 1))
5771 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5779 return legitimate_pic_address_disp_p (x);
5786 /* Determine if a given CONST RTX is a valid memory displacement
5790 legitimate_pic_address_disp_p (rtx disp)
5794 /* In 64bit mode we can allow direct addresses of symbols and labels
5795 when they are not dynamic symbols. */
5798 /* TLS references should always be enclosed in UNSPEC. */
5799 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5801 if (GET_CODE (disp) == SYMBOL_REF
5802 && ix86_cmodel == CM_SMALL_PIC
5803 && SYMBOL_REF_LOCAL_P (disp))
5805 if (GET_CODE (disp) == LABEL_REF)
5807 if (GET_CODE (disp) == CONST
5808 && GET_CODE (XEXP (disp, 0)) == PLUS)
5810 rtx op0 = XEXP (XEXP (disp, 0), 0);
5811 rtx op1 = XEXP (XEXP (disp, 0), 1);
5813 /* TLS references should always be enclosed in UNSPEC. */
5814 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5816 if (((GET_CODE (op0) == SYMBOL_REF
5817 && ix86_cmodel == CM_SMALL_PIC
5818 && SYMBOL_REF_LOCAL_P (op0))
5819 || GET_CODE (op0) == LABEL_REF)
5820 && GET_CODE (op1) == CONST_INT
5821 && INTVAL (op1) < 16*1024*1024
5822 && INTVAL (op1) >= -16*1024*1024)
5826 if (GET_CODE (disp) != CONST)
5828 disp = XEXP (disp, 0);
5832 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5833 of GOT tables. We should not need these anyway. */
5834 if (GET_CODE (disp) != UNSPEC
5835 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5838 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5839 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5845 if (GET_CODE (disp) == PLUS)
5847 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5849 disp = XEXP (disp, 0);
5853 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5854 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5856 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5857 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5858 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5860 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5861 if (! strcmp (sym_name, "<pic base>"))
5866 if (GET_CODE (disp) != UNSPEC)
5869 switch (XINT (disp, 1))
5874 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5876 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5877 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5878 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5880 case UNSPEC_GOTTPOFF:
5881 case UNSPEC_GOTNTPOFF:
5882 case UNSPEC_INDNTPOFF:
5885 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5887 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5889 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5895 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5896 memory address for an instruction. The MODE argument is the machine mode
5897 for the MEM expression that wants to use this address.
5899 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5900 convert common non-canonical forms to canonical form so that they will
5904 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5906 struct ix86_address parts;
5907 rtx base, index, disp;
5908 HOST_WIDE_INT scale;
5909 const char *reason = NULL;
5910 rtx reason_rtx = NULL_RTX;
5912 if (TARGET_DEBUG_ADDR)
5915 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5916 GET_MODE_NAME (mode), strict);
5920 if (ix86_decompose_address (addr, &parts) <= 0)
5922 reason = "decomposition failed";
5927 index = parts.index;
5929 scale = parts.scale;
5931 /* Validate base register.
5933 Don't allow SUBREG's here, it can lead to spill failures when the base
5934 is one word out of a two word structure, which is represented internally
5942 if (GET_CODE (base) == SUBREG)
5943 reg = SUBREG_REG (base);
5947 if (GET_CODE (reg) != REG)
5949 reason = "base is not a register";
5953 if (GET_MODE (base) != Pmode)
5955 reason = "base is not in Pmode";
5959 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5960 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5962 reason = "base is not valid";
5967 /* Validate index register.
5969 Don't allow SUBREG's here, it can lead to spill failures when the index
5970 is one word out of a two word structure, which is represented internally
5978 if (GET_CODE (index) == SUBREG)
5979 reg = SUBREG_REG (index);
5983 if (GET_CODE (reg) != REG)
5985 reason = "index is not a register";
5989 if (GET_MODE (index) != Pmode)
5991 reason = "index is not in Pmode";
5995 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5996 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5998 reason = "index is not valid";
6003 /* Validate scale factor. */
6006 reason_rtx = GEN_INT (scale);
6009 reason = "scale without index";
6013 if (scale != 2 && scale != 4 && scale != 8)
6015 reason = "scale is not a valid multiplier";
6020 /* Validate displacement. */
6025 if (GET_CODE (disp) == CONST
6026 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6027 switch (XINT (XEXP (disp, 0), 1))
6031 case UNSPEC_GOTPCREL:
6034 goto is_legitimate_pic;
6036 case UNSPEC_GOTTPOFF:
6037 case UNSPEC_GOTNTPOFF:
6038 case UNSPEC_INDNTPOFF:
6044 reason = "invalid address unspec";
6048 else if (flag_pic && (SYMBOLIC_CONST (disp)
6050 && !machopic_operand_p (disp)
6055 if (TARGET_64BIT && (index || base))
6057 /* foo@dtpoff(%rX) is ok. */
6058 if (GET_CODE (disp) != CONST
6059 || GET_CODE (XEXP (disp, 0)) != PLUS
6060 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6061 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6062 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6063 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6065 reason = "non-constant pic memory reference";
6069 else if (! legitimate_pic_address_disp_p (disp))
6071 reason = "displacement is an invalid pic construct";
6075 /* This code used to verify that a symbolic pic displacement
6076 includes the pic_offset_table_rtx register.
6078 While this is good idea, unfortunately these constructs may
6079 be created by "adds using lea" optimization for incorrect
6088 This code is nonsensical, but results in addressing
6089 GOT table with pic_offset_table_rtx base. We can't
6090 just refuse it easily, since it gets matched by
6091 "addsi3" pattern, that later gets split to lea in the
6092 case output register differs from input. While this
6093 can be handled by separate addsi pattern for this case
6094 that never results in lea, this seems to be easier and
6095 correct fix for crash to disable this test. */
6097 else if (GET_CODE (disp) != LABEL_REF
6098 && GET_CODE (disp) != CONST_INT
6099 && (GET_CODE (disp) != CONST
6100 || !legitimate_constant_p (disp))
6101 && (GET_CODE (disp) != SYMBOL_REF
6102 || !legitimate_constant_p (disp)))
6104 reason = "displacement is not constant";
6107 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6109 reason = "displacement is out of range";
6114 /* Everything looks valid. */
6115 if (TARGET_DEBUG_ADDR)
6116 fprintf (stderr, "Success.\n");
6120 if (TARGET_DEBUG_ADDR)
6122 fprintf (stderr, "Error: %s\n", reason);
6123 debug_rtx (reason_rtx);
6128 /* Return an unique alias set for the GOT. */
6130 static HOST_WIDE_INT
6131 ix86_GOT_alias_set (void)
6133 static HOST_WIDE_INT set = -1;
6135 set = new_alias_set ();
6139 /* Return a legitimate reference for ORIG (an address) using the
6140 register REG. If REG is 0, a new pseudo is generated.
6142 There are two types of references that must be handled:
6144 1. Global data references must load the address from the GOT, via
6145 the PIC reg. An insn is emitted to do this load, and the reg is
6148 2. Static data references, constant pool addresses, and code labels
6149 compute the address as an offset from the GOT, whose base is in
6150 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6151 differentiate them from global data objects. The returned
6152 address is the PIC reg + an unspec constant.
6154 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6155 reg also appears in the address. */
6158 legitimize_pic_address (rtx orig, rtx reg)
6166 reg = gen_reg_rtx (Pmode);
6167 /* Use the generic Mach-O PIC machinery. */
6168 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6171 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6173 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6175 /* This symbol may be referenced via a displacement from the PIC
6176 base address (@GOTOFF). */
6178 if (reload_in_progress)
6179 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6180 if (GET_CODE (addr) == CONST)
6181 addr = XEXP (addr, 0);
6182 if (GET_CODE (addr) == PLUS)
6184 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6185 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6188 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6189 new = gen_rtx_CONST (Pmode, new);
6190 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6194 emit_move_insn (reg, new);
6198 else if (GET_CODE (addr) == SYMBOL_REF)
6202 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6203 new = gen_rtx_CONST (Pmode, new);
6204 new = gen_rtx_MEM (Pmode, new);
6205 RTX_UNCHANGING_P (new) = 1;
6206 set_mem_alias_set (new, ix86_GOT_alias_set ());
6209 reg = gen_reg_rtx (Pmode);
6210 /* Use directly gen_movsi, otherwise the address is loaded
6211 into register for CSE. We don't want to CSE this addresses,
6212 instead we CSE addresses from the GOT table, so skip this. */
6213 emit_insn (gen_movsi (reg, new));
6218 /* This symbol must be referenced via a load from the
6219 Global Offset Table (@GOT). */
6221 if (reload_in_progress)
6222 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6223 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6224 new = gen_rtx_CONST (Pmode, new);
6225 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6226 new = gen_rtx_MEM (Pmode, new);
6227 RTX_UNCHANGING_P (new) = 1;
6228 set_mem_alias_set (new, ix86_GOT_alias_set ());
6231 reg = gen_reg_rtx (Pmode);
6232 emit_move_insn (reg, new);
6238 if (GET_CODE (addr) == CONST)
6240 addr = XEXP (addr, 0);
6242 /* We must match stuff we generate before. Assume the only
6243 unspecs that can get here are ours. Not that we could do
6244 anything with them anyway... */
6245 if (GET_CODE (addr) == UNSPEC
6246 || (GET_CODE (addr) == PLUS
6247 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6249 if (GET_CODE (addr) != PLUS)
6252 if (GET_CODE (addr) == PLUS)
6254 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6256 /* Check first to see if this is a constant offset from a @GOTOFF
6257 symbol reference. */
6258 if (local_symbolic_operand (op0, Pmode)
6259 && GET_CODE (op1) == CONST_INT)
6263 if (reload_in_progress)
6264 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6265 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6267 new = gen_rtx_PLUS (Pmode, new, op1);
6268 new = gen_rtx_CONST (Pmode, new);
6269 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6273 emit_move_insn (reg, new);
6279 if (INTVAL (op1) < -16*1024*1024
6280 || INTVAL (op1) >= 16*1024*1024)
6281 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6286 base = legitimize_pic_address (XEXP (addr, 0), reg);
6287 new = legitimize_pic_address (XEXP (addr, 1),
6288 base == reg ? NULL_RTX : reg);
6290 if (GET_CODE (new) == CONST_INT)
6291 new = plus_constant (base, INTVAL (new));
6294 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6296 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6297 new = XEXP (new, 1);
6299 new = gen_rtx_PLUS (Pmode, base, new);
6307 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6310 get_thread_pointer (int to_reg)
6314 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6318 reg = gen_reg_rtx (Pmode);
6319 insn = gen_rtx_SET (VOIDmode, reg, tp);
6320 insn = emit_insn (insn);
6325 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6326 false if we expect this to be used for a memory address and true if
6327 we expect to load the address into a register. */
6330 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6332 rtx dest, base, off, pic;
6337 case TLS_MODEL_GLOBAL_DYNAMIC:
6338 dest = gen_reg_rtx (Pmode);
6341 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6344 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6345 insns = get_insns ();
6348 emit_libcall_block (insns, dest, rax, x);
6351 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6354 case TLS_MODEL_LOCAL_DYNAMIC:
6355 base = gen_reg_rtx (Pmode);
6358 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6361 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6362 insns = get_insns ();
6365 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6366 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6367 emit_libcall_block (insns, base, rax, note);
6370 emit_insn (gen_tls_local_dynamic_base_32 (base));
6372 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6373 off = gen_rtx_CONST (Pmode, off);
6375 return gen_rtx_PLUS (Pmode, base, off);
6377 case TLS_MODEL_INITIAL_EXEC:
6381 type = UNSPEC_GOTNTPOFF;
6385 if (reload_in_progress)
6386 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6387 pic = pic_offset_table_rtx;
6388 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6390 else if (!TARGET_GNU_TLS)
6392 pic = gen_reg_rtx (Pmode);
6393 emit_insn (gen_set_got (pic));
6394 type = UNSPEC_GOTTPOFF;
6399 type = UNSPEC_INDNTPOFF;
6402 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6403 off = gen_rtx_CONST (Pmode, off);
6405 off = gen_rtx_PLUS (Pmode, pic, off);
6406 off = gen_rtx_MEM (Pmode, off);
6407 RTX_UNCHANGING_P (off) = 1;
6408 set_mem_alias_set (off, ix86_GOT_alias_set ());
6410 if (TARGET_64BIT || TARGET_GNU_TLS)
6412 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6413 off = force_reg (Pmode, off);
6414 return gen_rtx_PLUS (Pmode, base, off);
6418 base = get_thread_pointer (true);
6419 dest = gen_reg_rtx (Pmode);
6420 emit_insn (gen_subsi3 (dest, base, off));
6424 case TLS_MODEL_LOCAL_EXEC:
6425 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6426 (TARGET_64BIT || TARGET_GNU_TLS)
6427 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6428 off = gen_rtx_CONST (Pmode, off);
6430 if (TARGET_64BIT || TARGET_GNU_TLS)
6432 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6433 return gen_rtx_PLUS (Pmode, base, off);
6437 base = get_thread_pointer (true);
6438 dest = gen_reg_rtx (Pmode);
6439 emit_insn (gen_subsi3 (dest, base, off));
6450 /* Try machine-dependent ways of modifying an illegitimate address
6451 to be legitimate. If we find one, return the new, valid address.
6452 This macro is used in only one place: `memory_address' in explow.c.
6454 OLDX is the address as it was before break_out_memory_refs was called.
6455 In some cases it is useful to look at this to decide what needs to be done.
6457 MODE and WIN are passed so that this macro can use
6458 GO_IF_LEGITIMATE_ADDRESS.
6460 It is always safe for this macro to do nothing. It exists to recognize
6461 opportunities to optimize the output.
6463 For the 80386, we handle X+REG by loading X into a register R and
6464 using R+REG. R will go in a general reg and indexing will be used.
6465 However, if REG is a broken-out memory address or multiplication,
6466 nothing needs to be done because REG can certainly go in a general reg.
6468 When -fpic is used, special handling is needed for symbolic references.
6469 See comments by legitimize_pic_address in i386.c for details. */
6472 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6477 if (TARGET_DEBUG_ADDR)
6479 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6480 GET_MODE_NAME (mode));
6484 log = tls_symbolic_operand (x, mode);
6486 return legitimize_tls_address (x, log, false);
6488 if (flag_pic && SYMBOLIC_CONST (x))
6489 return legitimize_pic_address (x, 0);
6491 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6492 if (GET_CODE (x) == ASHIFT
6493 && GET_CODE (XEXP (x, 1)) == CONST_INT
6494 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6497 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6498 GEN_INT (1 << log));
6501 if (GET_CODE (x) == PLUS)
6503 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6505 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6506 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6507 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6510 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6511 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6512 GEN_INT (1 << log));
6515 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6516 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6517 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6520 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6521 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6522 GEN_INT (1 << log));
6525 /* Put multiply first if it isn't already. */
6526 if (GET_CODE (XEXP (x, 1)) == MULT)
6528 rtx tmp = XEXP (x, 0);
6529 XEXP (x, 0) = XEXP (x, 1);
6534 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6535 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6536 created by virtual register instantiation, register elimination, and
6537 similar optimizations. */
6538 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6541 x = gen_rtx_PLUS (Pmode,
6542 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6543 XEXP (XEXP (x, 1), 0)),
6544 XEXP (XEXP (x, 1), 1));
6548 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6549 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6550 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6551 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6552 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6553 && CONSTANT_P (XEXP (x, 1)))
6556 rtx other = NULL_RTX;
6558 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6560 constant = XEXP (x, 1);
6561 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6563 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6565 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6566 other = XEXP (x, 1);
6574 x = gen_rtx_PLUS (Pmode,
6575 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6576 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6577 plus_constant (other, INTVAL (constant)));
6581 if (changed && legitimate_address_p (mode, x, FALSE))
6584 if (GET_CODE (XEXP (x, 0)) == MULT)
6587 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6590 if (GET_CODE (XEXP (x, 1)) == MULT)
6593 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6597 && GET_CODE (XEXP (x, 1)) == REG
6598 && GET_CODE (XEXP (x, 0)) == REG)
6601 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6604 x = legitimize_pic_address (x, 0);
6607 if (changed && legitimate_address_p (mode, x, FALSE))
6610 if (GET_CODE (XEXP (x, 0)) == REG)
6612 rtx temp = gen_reg_rtx (Pmode);
6613 rtx val = force_operand (XEXP (x, 1), temp);
6615 emit_move_insn (temp, val);
6621 else if (GET_CODE (XEXP (x, 1)) == REG)
6623 rtx temp = gen_reg_rtx (Pmode);
6624 rtx val = force_operand (XEXP (x, 0), temp);
6626 emit_move_insn (temp, val);
6636 /* Print an integer constant expression in assembler syntax. Addition
6637 and subtraction are the only arithmetic that may appear in these
6638 expressions. FILE is the stdio stream to write to, X is the rtx, and
6639 CODE is the operand print code from the output string. */
6642 output_pic_addr_const (FILE *file, rtx x, int code)
6646 switch (GET_CODE (x))
6656 assemble_name (file, XSTR (x, 0));
6657 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6658 fputs ("@PLT", file);
6665 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6666 assemble_name (asm_out_file, buf);
6670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6674 /* This used to output parentheses around the expression,
6675 but that does not work on the 386 (either ATT or BSD assembler). */
6676 output_pic_addr_const (file, XEXP (x, 0), code);
6680 if (GET_MODE (x) == VOIDmode)
6682 /* We can use %d if the number is <32 bits and positive. */
6683 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6684 fprintf (file, "0x%lx%08lx",
6685 (unsigned long) CONST_DOUBLE_HIGH (x),
6686 (unsigned long) CONST_DOUBLE_LOW (x));
6688 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6691 /* We can't handle floating point constants;
6692 PRINT_OPERAND must handle them. */
6693 output_operand_lossage ("floating constant misused");
6697 /* Some assemblers need integer constants to appear first. */
6698 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6700 output_pic_addr_const (file, XEXP (x, 0), code);
6702 output_pic_addr_const (file, XEXP (x, 1), code);
6704 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6706 output_pic_addr_const (file, XEXP (x, 1), code);
6708 output_pic_addr_const (file, XEXP (x, 0), code);
6716 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6717 output_pic_addr_const (file, XEXP (x, 0), code);
6719 output_pic_addr_const (file, XEXP (x, 1), code);
6721 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6725 if (XVECLEN (x, 0) != 1)
6727 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6728 switch (XINT (x, 1))
6731 fputs ("@GOT", file);
6734 fputs ("@GOTOFF", file);
6736 case UNSPEC_GOTPCREL:
6737 fputs ("@GOTPCREL(%rip)", file);
6739 case UNSPEC_GOTTPOFF:
6740 /* FIXME: This might be @TPOFF in Sun ld too. */
6741 fputs ("@GOTTPOFF", file);
6744 fputs ("@TPOFF", file);
6748 fputs ("@TPOFF", file);
6750 fputs ("@NTPOFF", file);
6753 fputs ("@DTPOFF", file);
6755 case UNSPEC_GOTNTPOFF:
6757 fputs ("@GOTTPOFF(%rip)", file);
6759 fputs ("@GOTNTPOFF", file);
6761 case UNSPEC_INDNTPOFF:
6762 fputs ("@INDNTPOFF", file);
6765 output_operand_lossage ("invalid UNSPEC as operand");
6771 output_operand_lossage ("invalid expression as operand");
6775 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6776 We need to handle our special PIC relocations. */
6779 i386_dwarf_output_addr_const (FILE *file, rtx x)
6782 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6786 fprintf (file, "%s", ASM_LONG);
6789 output_pic_addr_const (file, x, '\0');
6791 output_addr_const (file, x);
6795 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6796 We need to emit DTP-relative relocations. */
6799 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6801 fputs (ASM_LONG, file);
6802 output_addr_const (file, x);
6803 fputs ("@DTPOFF", file);
6809 fputs (", 0", file);
6816 /* In the name of slightly smaller debug output, and to cater to
6817 general assembler losage, recognize PIC+GOTOFF and turn it back
6818 into a direct symbol reference. */
6821 ix86_delegitimize_address (rtx orig_x)
6825 if (GET_CODE (x) == MEM)
6830 if (GET_CODE (x) != CONST
6831 || GET_CODE (XEXP (x, 0)) != UNSPEC
6832 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6833 || GET_CODE (orig_x) != MEM)
6835 return XVECEXP (XEXP (x, 0), 0, 0);
6838 if (GET_CODE (x) != PLUS
6839 || GET_CODE (XEXP (x, 1)) != CONST)
6842 if (GET_CODE (XEXP (x, 0)) == REG
6843 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6844 /* %ebx + GOT/GOTOFF */
6846 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6848 /* %ebx + %reg * scale + GOT/GOTOFF */
6850 if (GET_CODE (XEXP (y, 0)) == REG
6851 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6853 else if (GET_CODE (XEXP (y, 1)) == REG
6854 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6858 if (GET_CODE (y) != REG
6859 && GET_CODE (y) != MULT
6860 && GET_CODE (y) != ASHIFT)
6866 x = XEXP (XEXP (x, 1), 0);
6867 if (GET_CODE (x) == UNSPEC
6868 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6869 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6872 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6873 return XVECEXP (x, 0, 0);
6876 if (GET_CODE (x) == PLUS
6877 && GET_CODE (XEXP (x, 0)) == UNSPEC
6878 && GET_CODE (XEXP (x, 1)) == CONST_INT
6879 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6880 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6881 && GET_CODE (orig_x) != MEM)))
6883 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6885 return gen_rtx_PLUS (Pmode, y, x);
6893 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6898 if (mode == CCFPmode || mode == CCFPUmode)
6900 enum rtx_code second_code, bypass_code;
6901 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6902 if (bypass_code != NIL || second_code != NIL)
6904 code = ix86_fp_compare_code_to_integer (code);
6908 code = reverse_condition (code);
6919 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6924 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6925 Those same assemblers have the same but opposite losage on cmov. */
6928 suffix = fp ? "nbe" : "a";
6931 if (mode == CCNOmode || mode == CCGOCmode)
6933 else if (mode == CCmode || mode == CCGCmode)
6944 if (mode == CCNOmode || mode == CCGOCmode)
6946 else if (mode == CCmode || mode == CCGCmode)
6955 suffix = fp ? "nb" : "ae";
6958 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6968 suffix = fp ? "u" : "p";
6971 suffix = fp ? "nu" : "np";
6976 fputs (suffix, file);
6980 print_reg (rtx x, int code, FILE *file)
6982 /* Code -1 indicates we are called from print_rtx, and it is not
6983 an error for a virtual register to appear here. */
6986 else if (REGNO (x) == ARG_POINTER_REGNUM
6987 || REGNO (x) == FRAME_POINTER_REGNUM
6988 || REGNO (x) == FLAGS_REG
6989 || REGNO (x) == FPSR_REG)
6992 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6995 if (code == 'w' || MMX_REG_P (x))
6997 else if (code == 'b')
6999 else if (code == 'k')
7001 else if (code == 'q')
7003 else if (code == 'y')
7005 else if (code == 'h')
7008 code = GET_MODE_SIZE (GET_MODE (x));
7010 /* Irritatingly, AMD extended registers use different naming convention
7011 from the normal registers. */
7012 if (REX_INT_REG_P (x))
7019 error ("extended registers have no high halves");
7022 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7025 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7028 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7031 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7034 error ("unsupported operand size for extended register");
7042 if (STACK_TOP_P (x))
7044 fputs ("st(0)", file);
7051 if (! ANY_FP_REG_P (x))
7052 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7057 fputs (hi_reg_name[REGNO (x)], file);
7060 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7062 fputs (qi_reg_name[REGNO (x)], file);
7065 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7067 fputs (qi_high_reg_name[REGNO (x)], file);
7074 /* Locate some local-dynamic symbol still in use by this function
7075 so that we can print its name in some tls_local_dynamic_base
7079 get_some_local_dynamic_name (void)
7083 if (cfun->machine->some_ld_name)
7084 return cfun->machine->some_ld_name;
7086 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7088 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7089 return cfun->machine->some_ld_name;
7095 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7099 if (GET_CODE (x) == SYMBOL_REF
7100 && local_dynamic_symbolic_operand (x, Pmode))
7102 cfun->machine->some_ld_name = XSTR (x, 0);
7110 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7111 C -- print opcode suffix for set/cmov insn.
7112 c -- like C, but print reversed condition
7113 F,f -- likewise, but for floating-point.
7114 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7116 R -- print the prefix for register names.
7117 z -- print the opcode suffix for the size of the current operand.
7118 * -- print a star (in certain assembler syntax)
7119 A -- print an absolute memory reference.
7120 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7121 s -- print a shift double count, followed by the assemblers argument
7123 b -- print the QImode name of the register for the indicated operand.
7124 %b0 would print %al if operands[0] is reg 0.
7125 w -- likewise, print the HImode name of the register.
7126 k -- likewise, print the SImode name of the register.
7127 q -- likewise, print the DImode name of the register.
7128 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7129 y -- print "st(0)" instead of "st" as a register.
7130 D -- print condition for SSE cmp instruction.
7131 P -- if PIC, print an @PLT suffix.
7132 X -- don't print any sort of PIC '@' suffix for a symbol.
7133 & -- print some in-use local-dynamic symbol name.
7137 print_operand (FILE *file, rtx x, int code)
7144 if (ASSEMBLER_DIALECT == ASM_ATT)
7149 assemble_name (file, get_some_local_dynamic_name ());
7153 if (ASSEMBLER_DIALECT == ASM_ATT)
7155 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7157 /* Intel syntax. For absolute addresses, registers should not
7158 be surrounded by braces. */
7159 if (GET_CODE (x) != REG)
7162 PRINT_OPERAND (file, x, 0);
7170 PRINT_OPERAND (file, x, 0);
7175 if (ASSEMBLER_DIALECT == ASM_ATT)
7180 if (ASSEMBLER_DIALECT == ASM_ATT)
7185 if (ASSEMBLER_DIALECT == ASM_ATT)
7190 if (ASSEMBLER_DIALECT == ASM_ATT)
7195 if (ASSEMBLER_DIALECT == ASM_ATT)
7200 if (ASSEMBLER_DIALECT == ASM_ATT)
7205 /* 387 opcodes don't get size suffixes if the operands are
7207 if (STACK_REG_P (x))
7210 /* Likewise if using Intel opcodes. */
7211 if (ASSEMBLER_DIALECT == ASM_INTEL)
7214 /* This is the size of op from size of operand. */
7215 switch (GET_MODE_SIZE (GET_MODE (x)))
7218 #ifdef HAVE_GAS_FILDS_FISTS
7224 if (GET_MODE (x) == SFmode)
7239 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7241 #ifdef GAS_MNEMONICS
7267 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7269 PRINT_OPERAND (file, x, 0);
7275 /* Little bit of braindamage here. The SSE compare instructions
7276 does use completely different names for the comparisons that the
7277 fp conditional moves. */
7278 switch (GET_CODE (x))
7293 fputs ("unord", file);
7297 fputs ("neq", file);
7301 fputs ("nlt", file);
7305 fputs ("nle", file);
7308 fputs ("ord", file);
7316 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7317 if (ASSEMBLER_DIALECT == ASM_ATT)
7319 switch (GET_MODE (x))
7321 case HImode: putc ('w', file); break;
7323 case SFmode: putc ('l', file); break;
7325 case DFmode: putc ('q', file); break;
7333 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7336 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7337 if (ASSEMBLER_DIALECT == ASM_ATT)
7340 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7343 /* Like above, but reverse condition */
7345 /* Check to see if argument to %c is really a constant
7346 and not a condition code which needs to be reversed. */
7347 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7349 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7352 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7355 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7356 if (ASSEMBLER_DIALECT == ASM_ATT)
7359 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7365 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7368 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7371 int pred_val = INTVAL (XEXP (x, 0));
7373 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7374 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7376 int taken = pred_val > REG_BR_PROB_BASE / 2;
7377 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7379 /* Emit hints only in the case default branch prediction
7380 heuristics would fail. */
7381 if (taken != cputaken)
7383 /* We use 3e (DS) prefix for taken branches and
7384 2e (CS) prefix for not taken branches. */
7386 fputs ("ds ; ", file);
7388 fputs ("cs ; ", file);
7395 output_operand_lossage ("invalid operand code `%c'", code);
7399 if (GET_CODE (x) == REG)
7401 PRINT_REG (x, code, file);
7404 else if (GET_CODE (x) == MEM)
7406 /* No `byte ptr' prefix for call instructions. */
7407 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7410 switch (GET_MODE_SIZE (GET_MODE (x)))
7412 case 1: size = "BYTE"; break;
7413 case 2: size = "WORD"; break;
7414 case 4: size = "DWORD"; break;
7415 case 8: size = "QWORD"; break;
7416 case 12: size = "XWORD"; break;
7417 case 16: size = "XMMWORD"; break;
7422 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7425 else if (code == 'w')
7427 else if (code == 'k')
7431 fputs (" PTR ", file);
7435 /* Avoid (%rip) for call operands. */
7436 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7437 && GET_CODE (x) != CONST_INT)
7438 output_addr_const (file, x);
7439 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7440 output_operand_lossage ("invalid constraints for operand");
7445 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7450 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7451 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7453 if (ASSEMBLER_DIALECT == ASM_ATT)
7455 fprintf (file, "0x%08lx", l);
7458 /* These float cases don't actually occur as immediate operands. */
7459 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7463 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7464 fprintf (file, "%s", dstr);
7467 else if (GET_CODE (x) == CONST_DOUBLE
7468 && GET_MODE (x) == XFmode)
7472 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7473 fprintf (file, "%s", dstr);
7480 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7482 if (ASSEMBLER_DIALECT == ASM_ATT)
7485 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7486 || GET_CODE (x) == LABEL_REF)
7488 if (ASSEMBLER_DIALECT == ASM_ATT)
7491 fputs ("OFFSET FLAT:", file);
7494 if (GET_CODE (x) == CONST_INT)
7495 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7497 output_pic_addr_const (file, x, code);
7499 output_addr_const (file, x);
7503 /* Print a memory operand whose address is ADDR. */
7506 print_operand_address (FILE *file, rtx addr)
7508 struct ix86_address parts;
7509 rtx base, index, disp;
7512 if (! ix86_decompose_address (addr, &parts))
7516 index = parts.index;
7518 scale = parts.scale;
7526 if (USER_LABEL_PREFIX[0] == 0)
7528 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7534 if (!base && !index)
7536 /* Displacement only requires special attention. */
7538 if (GET_CODE (disp) == CONST_INT)
7540 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7542 if (USER_LABEL_PREFIX[0] == 0)
7544 fputs ("ds:", file);
7546 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7549 output_pic_addr_const (file, disp, 0);
7551 output_addr_const (file, disp);
7553 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7555 && ((GET_CODE (disp) == SYMBOL_REF
7556 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7557 || GET_CODE (disp) == LABEL_REF
7558 || (GET_CODE (disp) == CONST
7559 && GET_CODE (XEXP (disp, 0)) == PLUS
7560 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7561 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7562 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7563 fputs ("(%rip)", file);
7567 if (ASSEMBLER_DIALECT == ASM_ATT)
7572 output_pic_addr_const (file, disp, 0);
7573 else if (GET_CODE (disp) == LABEL_REF)
7574 output_asm_label (disp);
7576 output_addr_const (file, disp);
7581 PRINT_REG (base, 0, file);
7585 PRINT_REG (index, 0, file);
7587 fprintf (file, ",%d", scale);
7593 rtx offset = NULL_RTX;
7597 /* Pull out the offset of a symbol; print any symbol itself. */
7598 if (GET_CODE (disp) == CONST
7599 && GET_CODE (XEXP (disp, 0)) == PLUS
7600 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7602 offset = XEXP (XEXP (disp, 0), 1);
7603 disp = gen_rtx_CONST (VOIDmode,
7604 XEXP (XEXP (disp, 0), 0));
7608 output_pic_addr_const (file, disp, 0);
7609 else if (GET_CODE (disp) == LABEL_REF)
7610 output_asm_label (disp);
7611 else if (GET_CODE (disp) == CONST_INT)
7614 output_addr_const (file, disp);
7620 PRINT_REG (base, 0, file);
7623 if (INTVAL (offset) >= 0)
7625 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7629 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7636 PRINT_REG (index, 0, file);
7638 fprintf (file, "*%d", scale);
7646 output_addr_const_extra (FILE *file, rtx x)
7650 if (GET_CODE (x) != UNSPEC)
7653 op = XVECEXP (x, 0, 0);
7654 switch (XINT (x, 1))
7656 case UNSPEC_GOTTPOFF:
7657 output_addr_const (file, op);
7658 /* FIXME: This might be @TPOFF in Sun ld. */
7659 fputs ("@GOTTPOFF", file);
7662 output_addr_const (file, op);
7663 fputs ("@TPOFF", file);
7666 output_addr_const (file, op);
7668 fputs ("@TPOFF", file);
7670 fputs ("@NTPOFF", file);
7673 output_addr_const (file, op);
7674 fputs ("@DTPOFF", file);
7676 case UNSPEC_GOTNTPOFF:
7677 output_addr_const (file, op);
7679 fputs ("@GOTTPOFF(%rip)", file);
7681 fputs ("@GOTNTPOFF", file);
7683 case UNSPEC_INDNTPOFF:
7684 output_addr_const (file, op);
7685 fputs ("@INDNTPOFF", file);
7695 /* Split one or more DImode RTL references into pairs of SImode
7696 references. The RTL can be REG, offsettable MEM, integer constant, or
7697 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7698 split and "num" is its length. lo_half and hi_half are output arrays
7699 that parallel "operands". */
7702 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7706 rtx op = operands[num];
7708 /* simplify_subreg refuse to split volatile memory addresses,
7709 but we still have to handle it. */
7710 if (GET_CODE (op) == MEM)
7712 lo_half[num] = adjust_address (op, SImode, 0);
7713 hi_half[num] = adjust_address (op, SImode, 4);
7717 lo_half[num] = simplify_gen_subreg (SImode, op,
7718 GET_MODE (op) == VOIDmode
7719 ? DImode : GET_MODE (op), 0);
7720 hi_half[num] = simplify_gen_subreg (SImode, op,
7721 GET_MODE (op) == VOIDmode
7722 ? DImode : GET_MODE (op), 4);
7726 /* Split one or more TImode RTL references into pairs of SImode
7727 references. The RTL can be REG, offsettable MEM, integer constant, or
7728 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7729 split and "num" is its length. lo_half and hi_half are output arrays
7730 that parallel "operands". */
7733 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7737 rtx op = operands[num];
7739 /* simplify_subreg refuse to split volatile memory addresses, but we
7740 still have to handle it. */
7741 if (GET_CODE (op) == MEM)
7743 lo_half[num] = adjust_address (op, DImode, 0);
7744 hi_half[num] = adjust_address (op, DImode, 8);
7748 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7749 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7754 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7755 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7756 is the expression of the binary operation. The output may either be
7757 emitted here, or returned to the caller, like all output_* functions.
7759 There is no guarantee that the operands are the same mode, as they
7760 might be within FLOAT or FLOAT_EXTEND expressions. */
7762 #ifndef SYSV386_COMPAT
7763 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7764 wants to fix the assemblers because that causes incompatibility
7765 with gcc. No-one wants to fix gcc because that causes
7766 incompatibility with assemblers... You can use the option of
7767 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7768 #define SYSV386_COMPAT 1
7772 output_387_binary_op (rtx insn, rtx *operands)
7774 static char buf[30];
7777 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7779 #ifdef ENABLE_CHECKING
7780 /* Even if we do not want to check the inputs, this documents input
7781 constraints. Which helps in understanding the following code. */
7782 if (STACK_REG_P (operands[0])
7783 && ((REG_P (operands[1])
7784 && REGNO (operands[0]) == REGNO (operands[1])
7785 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7786 || (REG_P (operands[2])
7787 && REGNO (operands[0]) == REGNO (operands[2])
7788 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7789 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7795 switch (GET_CODE (operands[3]))
7798 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7799 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7807 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7808 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7816 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7817 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7825 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7826 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7840 if (GET_MODE (operands[0]) == SFmode)
7841 strcat (buf, "ss\t{%2, %0|%0, %2}");
7843 strcat (buf, "sd\t{%2, %0|%0, %2}");
7848 switch (GET_CODE (operands[3]))
7852 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7854 rtx temp = operands[2];
7855 operands[2] = operands[1];
7859 /* know operands[0] == operands[1]. */
7861 if (GET_CODE (operands[2]) == MEM)
7867 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7869 if (STACK_TOP_P (operands[0]))
7870 /* How is it that we are storing to a dead operand[2]?
7871 Well, presumably operands[1] is dead too. We can't
7872 store the result to st(0) as st(0) gets popped on this
7873 instruction. Instead store to operands[2] (which I
7874 think has to be st(1)). st(1) will be popped later.
7875 gcc <= 2.8.1 didn't have this check and generated
7876 assembly code that the Unixware assembler rejected. */
7877 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7879 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7883 if (STACK_TOP_P (operands[0]))
7884 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7886 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7891 if (GET_CODE (operands[1]) == MEM)
7897 if (GET_CODE (operands[2]) == MEM)
7903 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7906 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7907 derived assemblers, confusingly reverse the direction of
7908 the operation for fsub{r} and fdiv{r} when the
7909 destination register is not st(0). The Intel assembler
7910 doesn't have this brain damage. Read !SYSV386_COMPAT to
7911 figure out what the hardware really does. */
7912 if (STACK_TOP_P (operands[0]))
7913 p = "{p\t%0, %2|rp\t%2, %0}";
7915 p = "{rp\t%2, %0|p\t%0, %2}";
7917 if (STACK_TOP_P (operands[0]))
7918 /* As above for fmul/fadd, we can't store to st(0). */
7919 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7921 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7926 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7929 if (STACK_TOP_P (operands[0]))
7930 p = "{rp\t%0, %1|p\t%1, %0}";
7932 p = "{p\t%1, %0|rp\t%0, %1}";
7934 if (STACK_TOP_P (operands[0]))
7935 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7937 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7942 if (STACK_TOP_P (operands[0]))
7944 if (STACK_TOP_P (operands[1]))
7945 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7947 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7950 else if (STACK_TOP_P (operands[1]))
7953 p = "{\t%1, %0|r\t%0, %1}";
7955 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7961 p = "{r\t%2, %0|\t%0, %2}";
7963 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7976 /* Output code to initialize control word copies used by
7977 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7978 is set to control word rounding downwards. */
7980 emit_i387_cw_initialization (rtx normal, rtx round_down)
7982 rtx reg = gen_reg_rtx (HImode);
7984 emit_insn (gen_x86_fnstcw_1 (normal));
7985 emit_move_insn (reg, normal);
7986 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7988 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7990 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7991 emit_move_insn (round_down, reg);
7994 /* Output code for INSN to convert a float to a signed int. OPERANDS
7995 are the insn operands. The output may be [HSD]Imode and the input
7996 operand may be [SDX]Fmode. */
7999 output_fix_trunc (rtx insn, rtx *operands)
8001 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8002 int dimode_p = GET_MODE (operands[0]) == DImode;
8004 /* Jump through a hoop or two for DImode, since the hardware has no
8005 non-popping instruction. We used to do this a different way, but
8006 that was somewhat fragile and broke with post-reload splitters. */
8007 if (dimode_p && !stack_top_dies)
8008 output_asm_insn ("fld\t%y1", operands);
8010 if (!STACK_TOP_P (operands[1]))
8013 if (GET_CODE (operands[0]) != MEM)
8016 output_asm_insn ("fldcw\t%3", operands);
8017 if (stack_top_dies || dimode_p)
8018 output_asm_insn ("fistp%z0\t%0", operands);
8020 output_asm_insn ("fist%z0\t%0", operands);
8021 output_asm_insn ("fldcw\t%2", operands);
8026 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8027 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8028 when fucom should be used. */
8031 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8034 rtx cmp_op0 = operands[0];
8035 rtx cmp_op1 = operands[1];
8036 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8041 cmp_op1 = operands[2];
8045 if (GET_MODE (operands[0]) == SFmode)
8047 return "ucomiss\t{%1, %0|%0, %1}";
8049 return "comiss\t{%1, %0|%0, %1}";
8052 return "ucomisd\t{%1, %0|%0, %1}";
8054 return "comisd\t{%1, %0|%0, %1}";
8057 if (! STACK_TOP_P (cmp_op0))
8060 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8062 if (STACK_REG_P (cmp_op1)
8064 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8065 && REGNO (cmp_op1) != FIRST_STACK_REG)
8067 /* If both the top of the 387 stack dies, and the other operand
8068 is also a stack register that dies, then this must be a
8069 `fcompp' float compare */
8073 /* There is no double popping fcomi variant. Fortunately,
8074 eflags is immune from the fstp's cc clobbering. */
8076 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8078 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8086 return "fucompp\n\tfnstsw\t%0";
8088 return "fcompp\n\tfnstsw\t%0";
8101 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8103 static const char * const alt[24] =
8115 "fcomi\t{%y1, %0|%0, %y1}",
8116 "fcomip\t{%y1, %0|%0, %y1}",
8117 "fucomi\t{%y1, %0|%0, %y1}",
8118 "fucomip\t{%y1, %0|%0, %y1}",
8125 "fcom%z2\t%y2\n\tfnstsw\t%0",
8126 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8127 "fucom%z2\t%y2\n\tfnstsw\t%0",
8128 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8130 "ficom%z2\t%y2\n\tfnstsw\t%0",
8131 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8139 mask = eflags_p << 3;
8140 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8141 mask |= unordered_p << 1;
8142 mask |= stack_top_dies;
8155 ix86_output_addr_vec_elt (FILE *file, int value)
8157 const char *directive = ASM_LONG;
8162 directive = ASM_QUAD;
8168 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8172 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8175 fprintf (file, "%s%s%d-%s%d\n",
8176 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8177 else if (HAVE_AS_GOTOFF_IN_DATA)
8178 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8180 else if (TARGET_MACHO)
8182 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8183 machopic_output_function_base_name (file);
8184 fprintf(file, "\n");
8188 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8189 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8192 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8196 ix86_expand_clear (rtx dest)
8200 /* We play register width games, which are only valid after reload. */
8201 if (!reload_completed)
8204 /* Avoid HImode and its attendant prefix byte. */
8205 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8206 dest = gen_rtx_REG (SImode, REGNO (dest));
8208 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8210 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8211 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8213 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8214 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8220 /* X is an unchanging MEM. If it is a constant pool reference, return
8221 the constant pool rtx, else NULL. */
8224 maybe_get_pool_constant (rtx x)
8226 x = ix86_delegitimize_address (XEXP (x, 0));
8228 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8229 return get_pool_constant (x);
8235 ix86_expand_move (enum machine_mode mode, rtx operands[])
8237 int strict = (reload_in_progress || reload_completed);
8239 enum tls_model model;
8244 model = tls_symbolic_operand (op1, Pmode);
8247 op1 = legitimize_tls_address (op1, model, true);
8248 op1 = force_operand (op1, op0);
8253 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8258 rtx temp = ((reload_in_progress
8259 || ((op0 && GET_CODE (op0) == REG)
8261 ? op0 : gen_reg_rtx (Pmode));
8262 op1 = machopic_indirect_data_reference (op1, temp);
8263 op1 = machopic_legitimize_pic_address (op1, mode,
8264 temp == op1 ? 0 : temp);
8266 else if (MACHOPIC_INDIRECT)
8267 op1 = machopic_indirect_data_reference (op1, 0);
8271 if (GET_CODE (op0) == MEM)
8272 op1 = force_reg (Pmode, op1);
8276 if (GET_CODE (temp) != REG)
8277 temp = gen_reg_rtx (Pmode);
8278 temp = legitimize_pic_address (op1, temp);
8283 #endif /* TARGET_MACHO */
8287 if (GET_CODE (op0) == MEM
8288 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8289 || !push_operand (op0, mode))
8290 && GET_CODE (op1) == MEM)
8291 op1 = force_reg (mode, op1);
8293 if (push_operand (op0, mode)
8294 && ! general_no_elim_operand (op1, mode))
8295 op1 = copy_to_mode_reg (mode, op1);
8297 /* Force large constants in 64bit compilation into register
8298 to get them CSEed. */
8299 if (TARGET_64BIT && mode == DImode
8300 && immediate_operand (op1, mode)
8301 && !x86_64_zero_extended_value (op1)
8302 && !register_operand (op0, mode)
8303 && optimize && !reload_completed && !reload_in_progress)
8304 op1 = copy_to_mode_reg (mode, op1);
8306 if (FLOAT_MODE_P (mode))
8308 /* If we are loading a floating point constant to a register,
8309 force the value to memory now, since we'll get better code
8310 out the back end. */
8314 else if (GET_CODE (op1) == CONST_DOUBLE)
8316 op1 = validize_mem (force_const_mem (mode, op1));
8317 if (!register_operand (op0, mode))
8319 rtx temp = gen_reg_rtx (mode);
8320 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8321 emit_move_insn (op0, temp);
8328 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8332 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8334 /* Force constants other than zero into memory. We do not know how
8335 the instructions used to build constants modify the upper 64 bits
8336 of the register, once we have that information we may be able
8337 to handle some of them more efficiently. */
8338 if ((reload_in_progress | reload_completed) == 0
8339 && register_operand (operands[0], mode)
8340 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8341 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8343 /* Make operand1 a register if it isn't already. */
8345 && !register_operand (operands[0], mode)
8346 && !register_operand (operands[1], mode))
8348 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8349 emit_move_insn (operands[0], temp);
8353 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8356 /* Attempt to expand a binary operator. Make the expansion closer to the
8357 actual machine, then just general_operand, which will allow 3 separate
8358 memory references (one output, two input) in a single insn. */
8361 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8364 int matching_memory;
8365 rtx src1, src2, dst, op, clob;
8371 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8372 if (GET_RTX_CLASS (code) == 'c'
8373 && (rtx_equal_p (dst, src2)
8374 || immediate_operand (src1, mode)))
8381 /* If the destination is memory, and we do not have matching source
8382 operands, do things in registers. */
8383 matching_memory = 0;
8384 if (GET_CODE (dst) == MEM)
8386 if (rtx_equal_p (dst, src1))
8387 matching_memory = 1;
8388 else if (GET_RTX_CLASS (code) == 'c'
8389 && rtx_equal_p (dst, src2))
8390 matching_memory = 2;
8392 dst = gen_reg_rtx (mode);
8395 /* Both source operands cannot be in memory. */
8396 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8398 if (matching_memory != 2)
8399 src2 = force_reg (mode, src2);
8401 src1 = force_reg (mode, src1);
8404 /* If the operation is not commutable, source 1 cannot be a constant
8405 or non-matching memory. */
8406 if ((CONSTANT_P (src1)
8407 || (!matching_memory && GET_CODE (src1) == MEM))
8408 && GET_RTX_CLASS (code) != 'c')
8409 src1 = force_reg (mode, src1);
8411 /* If optimizing, copy to regs to improve CSE */
8412 if (optimize && ! no_new_pseudos)
8414 if (GET_CODE (dst) == MEM)
8415 dst = gen_reg_rtx (mode);
8416 if (GET_CODE (src1) == MEM)
8417 src1 = force_reg (mode, src1);
8418 if (GET_CODE (src2) == MEM)
8419 src2 = force_reg (mode, src2);
8422 /* Emit the instruction. */
8424 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8425 if (reload_in_progress)
8427 /* Reload doesn't know about the flags register, and doesn't know that
8428 it doesn't want to clobber it. We can only do this with PLUS. */
8435 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8436 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8439 /* Fix up the destination if needed. */
8440 if (dst != operands[0])
8441 emit_move_insn (operands[0], dst);
8444 /* Return TRUE or FALSE depending on whether the binary operator meets the
8445 appropriate constraints. */
8448 ix86_binary_operator_ok (enum rtx_code code,
8449 enum machine_mode mode ATTRIBUTE_UNUSED,
8452 /* Both source operands cannot be in memory. */
8453 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8455 /* If the operation is not commutable, source 1 cannot be a constant. */
8456 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8458 /* If the destination is memory, we must have a matching source operand. */
8459 if (GET_CODE (operands[0]) == MEM
8460 && ! (rtx_equal_p (operands[0], operands[1])
8461 || (GET_RTX_CLASS (code) == 'c'
8462 && rtx_equal_p (operands[0], operands[2]))))
8464 /* If the operation is not commutable and the source 1 is memory, we must
8465 have a matching destination. */
8466 if (GET_CODE (operands[1]) == MEM
8467 && GET_RTX_CLASS (code) != 'c'
8468 && ! rtx_equal_p (operands[0], operands[1]))
8473 /* Attempt to expand a unary operator. Make the expansion closer to the
8474 actual machine, then just general_operand, which will allow 2 separate
8475 memory references (one output, one input) in a single insn. */
8478 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8481 int matching_memory;
8482 rtx src, dst, op, clob;
8487 /* If the destination is memory, and we do not have matching source
8488 operands, do things in registers. */
8489 matching_memory = 0;
8490 if (GET_CODE (dst) == MEM)
8492 if (rtx_equal_p (dst, src))
8493 matching_memory = 1;
8495 dst = gen_reg_rtx (mode);
8498 /* When source operand is memory, destination must match. */
8499 if (!matching_memory && GET_CODE (src) == MEM)
8500 src = force_reg (mode, src);
8502 /* If optimizing, copy to regs to improve CSE */
8503 if (optimize && ! no_new_pseudos)
8505 if (GET_CODE (dst) == MEM)
8506 dst = gen_reg_rtx (mode);
8507 if (GET_CODE (src) == MEM)
8508 src = force_reg (mode, src);
8511 /* Emit the instruction. */
8513 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8514 if (reload_in_progress || code == NOT)
8516 /* Reload doesn't know about the flags register, and doesn't know that
8517 it doesn't want to clobber it. */
8524 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8525 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8528 /* Fix up the destination if needed. */
8529 if (dst != operands[0])
8530 emit_move_insn (operands[0], dst);
8533 /* Return TRUE or FALSE depending on whether the unary operator meets the
8534 appropriate constraints. */
8537 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8538 enum machine_mode mode ATTRIBUTE_UNUSED,
8539 rtx operands[2] ATTRIBUTE_UNUSED)
8541 /* If one of operands is memory, source and destination must match. */
8542 if ((GET_CODE (operands[0]) == MEM
8543 || GET_CODE (operands[1]) == MEM)
8544 && ! rtx_equal_p (operands[0], operands[1]))
8549 /* Return TRUE or FALSE depending on whether the first SET in INSN
8550 has source and destination with matching CC modes, and that the
8551 CC mode is at least as constrained as REQ_MODE. */
8554 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8557 enum machine_mode set_mode;
8559 set = PATTERN (insn);
8560 if (GET_CODE (set) == PARALLEL)
8561 set = XVECEXP (set, 0, 0);
8562 if (GET_CODE (set) != SET)
8564 if (GET_CODE (SET_SRC (set)) != COMPARE)
8567 set_mode = GET_MODE (SET_DEST (set));
8571 if (req_mode != CCNOmode
8572 && (req_mode != CCmode
8573 || XEXP (SET_SRC (set), 1) != const0_rtx))
8577 if (req_mode == CCGCmode)
8581 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8585 if (req_mode == CCZmode)
8595 return (GET_MODE (SET_SRC (set)) == set_mode);
8598 /* Generate insn patterns to do an integer compare of OPERANDS. */
8601 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8603 enum machine_mode cmpmode;
8606 cmpmode = SELECT_CC_MODE (code, op0, op1);
8607 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8609 /* This is very simple, but making the interface the same as in the
8610 FP case makes the rest of the code easier. */
8611 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8612 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8614 /* Return the test that should be put into the flags user, i.e.
8615 the bcc, scc, or cmov instruction. */
8616 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8619 /* Figure out whether to use ordered or unordered fp comparisons.
8620 Return the appropriate mode to use. */
8623 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8625 /* ??? In order to make all comparisons reversible, we do all comparisons
8626 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8627 all forms trapping and nontrapping comparisons, we can make inequality
8628 comparisons trapping again, since it results in better code when using
8629 FCOM based compares. */
8630 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8634 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8636 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8637 return ix86_fp_compare_mode (code);
8640 /* Only zero flag is needed. */
8642 case NE: /* ZF!=0 */
8644 /* Codes needing carry flag. */
8645 case GEU: /* CF=0 */
8646 case GTU: /* CF=0 & ZF=0 */
8647 case LTU: /* CF=1 */
8648 case LEU: /* CF=1 | ZF=1 */
8650 /* Codes possibly doable only with sign flag when
8651 comparing against zero. */
8652 case GE: /* SF=OF or SF=0 */
8653 case LT: /* SF<>OF or SF=1 */
8654 if (op1 == const0_rtx)
8657 /* For other cases Carry flag is not required. */
8659 /* Codes doable only with sign flag when comparing
8660 against zero, but we miss jump instruction for it
8661 so we need to use relational tests against overflow
8662 that thus needs to be zero. */
8663 case GT: /* ZF=0 & SF=OF */
8664 case LE: /* ZF=1 | SF<>OF */
8665 if (op1 == const0_rtx)
8669 /* strcmp pattern do (use flags) and combine may ask us for proper
8678 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8681 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8683 enum rtx_code swapped_code = swap_condition (code);
8684 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8685 || (ix86_fp_comparison_cost (swapped_code)
8686 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8689 /* Swap, force into registers, or otherwise massage the two operands
8690 to a fp comparison. The operands are updated in place; the new
8691 comparison code is returned. */
8693 static enum rtx_code
8694 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8696 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8697 rtx op0 = *pop0, op1 = *pop1;
8698 enum machine_mode op_mode = GET_MODE (op0);
8699 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8701 /* All of the unordered compare instructions only work on registers.
8702 The same is true of the XFmode compare instructions. The same is
8703 true of the fcomi compare instructions. */
8706 && (fpcmp_mode == CCFPUmode
8707 || op_mode == XFmode
8708 || ix86_use_fcomi_compare (code)))
8710 op0 = force_reg (op_mode, op0);
8711 op1 = force_reg (op_mode, op1);
8715 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8716 things around if they appear profitable, otherwise force op0
8719 if (standard_80387_constant_p (op0) == 0
8720 || (GET_CODE (op0) == MEM
8721 && ! (standard_80387_constant_p (op1) == 0
8722 || GET_CODE (op1) == MEM)))
8725 tmp = op0, op0 = op1, op1 = tmp;
8726 code = swap_condition (code);
8729 if (GET_CODE (op0) != REG)
8730 op0 = force_reg (op_mode, op0);
8732 if (CONSTANT_P (op1))
8734 if (standard_80387_constant_p (op1))
8735 op1 = force_reg (op_mode, op1);
8737 op1 = validize_mem (force_const_mem (op_mode, op1));
8741 /* Try to rearrange the comparison to make it cheaper. */
8742 if (ix86_fp_comparison_cost (code)
8743 > ix86_fp_comparison_cost (swap_condition (code))
8744 && (GET_CODE (op1) == REG || !no_new_pseudos))
8747 tmp = op0, op0 = op1, op1 = tmp;
8748 code = swap_condition (code);
8749 if (GET_CODE (op0) != REG)
8750 op0 = force_reg (op_mode, op0);
8758 /* Convert comparison codes we use to represent FP comparison to integer
8759 code that will result in proper branch. Return UNKNOWN if no such code
8761 static enum rtx_code
8762 ix86_fp_compare_code_to_integer (enum rtx_code code)
8791 /* Split comparison code CODE into comparisons we can do using branch
8792 instructions. BYPASS_CODE is comparison code for branch that will
8793 branch around FIRST_CODE and SECOND_CODE. If some of branches
8794 is not required, set value to NIL.
8795 We never require more than two branches. */
8797 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8798 enum rtx_code *first_code,
8799 enum rtx_code *second_code)
8805 /* The fcomi comparison sets flags as follows:
8815 case GT: /* GTU - CF=0 & ZF=0 */
8816 case GE: /* GEU - CF=0 */
8817 case ORDERED: /* PF=0 */
8818 case UNORDERED: /* PF=1 */
8819 case UNEQ: /* EQ - ZF=1 */
8820 case UNLT: /* LTU - CF=1 */
8821 case UNLE: /* LEU - CF=1 | ZF=1 */
8822 case LTGT: /* EQ - ZF=0 */
8824 case LT: /* LTU - CF=1 - fails on unordered */
8826 *bypass_code = UNORDERED;
8828 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8830 *bypass_code = UNORDERED;
8832 case EQ: /* EQ - ZF=1 - fails on unordered */
8834 *bypass_code = UNORDERED;
8836 case NE: /* NE - ZF=0 - fails on unordered */
8838 *second_code = UNORDERED;
8840 case UNGE: /* GEU - CF=0 - fails on unordered */
8842 *second_code = UNORDERED;
8844 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8846 *second_code = UNORDERED;
8851 if (!TARGET_IEEE_FP)
8858 /* Return cost of comparison done fcom + arithmetics operations on AX.
8859 All following functions do use number of instructions as a cost metrics.
8860 In future this should be tweaked to compute bytes for optimize_size and
8861 take into account performance of various instructions on various CPUs. */
8863 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8865 if (!TARGET_IEEE_FP)
8867 /* The cost of code output by ix86_expand_fp_compare. */
8895 /* Return cost of comparison done using fcomi operation.
8896 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8898 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8900 enum rtx_code bypass_code, first_code, second_code;
8901 /* Return arbitrarily high cost when instruction is not supported - this
8902 prevents gcc from using it. */
8905 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8906 return (bypass_code != NIL || second_code != NIL) + 2;
8909 /* Return cost of comparison done using sahf operation.
8910 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8912 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8914 enum rtx_code bypass_code, first_code, second_code;
8915 /* Return arbitrarily high cost when instruction is not preferred - this
8916 avoids gcc from using it. */
8917 if (!TARGET_USE_SAHF && !optimize_size)
8919 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8920 return (bypass_code != NIL || second_code != NIL) + 3;
8923 /* Compute cost of the comparison done using any method.
8924 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8926 ix86_fp_comparison_cost (enum rtx_code code)
8928 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8931 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8932 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8934 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8935 if (min > sahf_cost)
8937 if (min > fcomi_cost)
8942 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8945 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8946 rtx *second_test, rtx *bypass_test)
8948 enum machine_mode fpcmp_mode, intcmp_mode;
8950 int cost = ix86_fp_comparison_cost (code);
8951 enum rtx_code bypass_code, first_code, second_code;
8953 fpcmp_mode = ix86_fp_compare_mode (code);
8954 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8957 *second_test = NULL_RTX;
8959 *bypass_test = NULL_RTX;
8961 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8963 /* Do fcomi/sahf based test when profitable. */
8964 if ((bypass_code == NIL || bypass_test)
8965 && (second_code == NIL || second_test)
8966 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8970 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8971 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8977 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8978 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8980 scratch = gen_reg_rtx (HImode);
8981 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8982 emit_insn (gen_x86_sahf_1 (scratch));
8985 /* The FP codes work out to act like unsigned. */
8986 intcmp_mode = fpcmp_mode;
8988 if (bypass_code != NIL)
8989 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8990 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8992 if (second_code != NIL)
8993 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8994 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8999 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9000 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9001 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9003 scratch = gen_reg_rtx (HImode);
9004 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9006 /* In the unordered case, we have to check C2 for NaN's, which
9007 doesn't happen to work out to anything nice combination-wise.
9008 So do some bit twiddling on the value we've got in AH to come
9009 up with an appropriate set of condition codes. */
9011 intcmp_mode = CCNOmode;
9016 if (code == GT || !TARGET_IEEE_FP)
9018 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9023 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9024 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9025 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9026 intcmp_mode = CCmode;
9032 if (code == LT && TARGET_IEEE_FP)
9034 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9035 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9036 intcmp_mode = CCmode;
9041 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9047 if (code == GE || !TARGET_IEEE_FP)
9049 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9054 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9055 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9062 if (code == LE && TARGET_IEEE_FP)
9064 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9065 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9066 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9067 intcmp_mode = CCmode;
9072 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9078 if (code == EQ && TARGET_IEEE_FP)
9080 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9081 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9082 intcmp_mode = CCmode;
9087 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9094 if (code == NE && TARGET_IEEE_FP)
9096 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9097 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9103 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9109 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9113 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9122 /* Return the test that should be put into the flags user, i.e.
9123 the bcc, scc, or cmov instruction. */
9124 return gen_rtx_fmt_ee (code, VOIDmode,
9125 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9130 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9133 op0 = ix86_compare_op0;
9134 op1 = ix86_compare_op1;
9137 *second_test = NULL_RTX;
9139 *bypass_test = NULL_RTX;
9141 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9142 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9143 second_test, bypass_test);
9145 ret = ix86_expand_int_compare (code, op0, op1);
9150 /* Return true if the CODE will result in nontrivial jump sequence. */
9152 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9154 enum rtx_code bypass_code, first_code, second_code;
9157 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9158 return bypass_code != NIL || second_code != NIL;
9162 ix86_expand_branch (enum rtx_code code, rtx label)
9166 switch (GET_MODE (ix86_compare_op0))
9172 tmp = ix86_expand_compare (code, NULL, NULL);
9173 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9174 gen_rtx_LABEL_REF (VOIDmode, label),
9176 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9185 enum rtx_code bypass_code, first_code, second_code;
9187 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9190 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9192 /* Check whether we will use the natural sequence with one jump. If
9193 so, we can expand jump early. Otherwise delay expansion by
9194 creating compound insn to not confuse optimizers. */
9195 if (bypass_code == NIL && second_code == NIL
9198 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9199 gen_rtx_LABEL_REF (VOIDmode, label),
9204 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9205 ix86_compare_op0, ix86_compare_op1);
9206 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9207 gen_rtx_LABEL_REF (VOIDmode, label),
9209 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9211 use_fcomi = ix86_use_fcomi_compare (code);
9212 vec = rtvec_alloc (3 + !use_fcomi);
9213 RTVEC_ELT (vec, 0) = tmp;
9215 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9217 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9220 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9222 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9230 /* Expand DImode branch into multiple compare+branch. */
9232 rtx lo[2], hi[2], label2;
9233 enum rtx_code code1, code2, code3;
9235 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9237 tmp = ix86_compare_op0;
9238 ix86_compare_op0 = ix86_compare_op1;
9239 ix86_compare_op1 = tmp;
9240 code = swap_condition (code);
9242 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9243 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9245 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9246 avoid two branches. This costs one extra insn, so disable when
9247 optimizing for size. */
9249 if ((code == EQ || code == NE)
9251 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9256 if (hi[1] != const0_rtx)
9257 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9258 NULL_RTX, 0, OPTAB_WIDEN);
9261 if (lo[1] != const0_rtx)
9262 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9263 NULL_RTX, 0, OPTAB_WIDEN);
9265 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9266 NULL_RTX, 0, OPTAB_WIDEN);
9268 ix86_compare_op0 = tmp;
9269 ix86_compare_op1 = const0_rtx;
9270 ix86_expand_branch (code, label);
9274 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9275 op1 is a constant and the low word is zero, then we can just
9276 examine the high word. */
9278 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9281 case LT: case LTU: case GE: case GEU:
9282 ix86_compare_op0 = hi[0];
9283 ix86_compare_op1 = hi[1];
9284 ix86_expand_branch (code, label);
9290 /* Otherwise, we need two or three jumps. */
9292 label2 = gen_label_rtx ();
9295 code2 = swap_condition (code);
9296 code3 = unsigned_condition (code);
9300 case LT: case GT: case LTU: case GTU:
9303 case LE: code1 = LT; code2 = GT; break;
9304 case GE: code1 = GT; code2 = LT; break;
9305 case LEU: code1 = LTU; code2 = GTU; break;
9306 case GEU: code1 = GTU; code2 = LTU; break;
9308 case EQ: code1 = NIL; code2 = NE; break;
9309 case NE: code2 = NIL; break;
9317 * if (hi(a) < hi(b)) goto true;
9318 * if (hi(a) > hi(b)) goto false;
9319 * if (lo(a) < lo(b)) goto true;
9323 ix86_compare_op0 = hi[0];
9324 ix86_compare_op1 = hi[1];
9327 ix86_expand_branch (code1, label);
9329 ix86_expand_branch (code2, label2);
9331 ix86_compare_op0 = lo[0];
9332 ix86_compare_op1 = lo[1];
9333 ix86_expand_branch (code3, label);
9336 emit_label (label2);
9345 /* Split branch based on floating point condition. */
9347 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9348 rtx target1, rtx target2, rtx tmp)
9351 rtx label = NULL_RTX;
9353 int bypass_probability = -1, second_probability = -1, probability = -1;
9356 if (target2 != pc_rtx)
9359 code = reverse_condition_maybe_unordered (code);
9364 condition = ix86_expand_fp_compare (code, op1, op2,
9365 tmp, &second, &bypass);
9367 if (split_branch_probability >= 0)
9369 /* Distribute the probabilities across the jumps.
9370 Assume the BYPASS and SECOND to be always test
9372 probability = split_branch_probability;
9374 /* Value of 1 is low enough to make no need for probability
9375 to be updated. Later we may run some experiments and see
9376 if unordered values are more frequent in practice. */
9378 bypass_probability = 1;
9380 second_probability = 1;
9382 if (bypass != NULL_RTX)
9384 label = gen_label_rtx ();
9385 i = emit_jump_insn (gen_rtx_SET
9387 gen_rtx_IF_THEN_ELSE (VOIDmode,
9389 gen_rtx_LABEL_REF (VOIDmode,
9392 if (bypass_probability >= 0)
9394 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9395 GEN_INT (bypass_probability),
9398 i = emit_jump_insn (gen_rtx_SET
9400 gen_rtx_IF_THEN_ELSE (VOIDmode,
9401 condition, target1, target2)));
9402 if (probability >= 0)
9404 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9405 GEN_INT (probability),
9407 if (second != NULL_RTX)
9409 i = emit_jump_insn (gen_rtx_SET
9411 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9413 if (second_probability >= 0)
9415 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9416 GEN_INT (second_probability),
9419 if (label != NULL_RTX)
9424 ix86_expand_setcc (enum rtx_code code, rtx dest)
9426 rtx ret, tmp, tmpreg, equiv;
9427 rtx second_test, bypass_test;
9429 if (GET_MODE (ix86_compare_op0) == DImode
9431 return 0; /* FAIL */
9433 if (GET_MODE (dest) != QImode)
9436 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9437 PUT_MODE (ret, QImode);
9442 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9443 if (bypass_test || second_test)
9445 rtx test = second_test;
9447 rtx tmp2 = gen_reg_rtx (QImode);
9454 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9456 PUT_MODE (test, QImode);
9457 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9460 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9462 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9465 /* Attach a REG_EQUAL note describing the comparison result. */
9466 equiv = simplify_gen_relational (code, QImode,
9467 GET_MODE (ix86_compare_op0),
9468 ix86_compare_op0, ix86_compare_op1);
9469 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9471 return 1; /* DONE */
9474 /* Expand comparison setting or clearing carry flag. Return true when
9475 successful and set pop for the operation. */
9477 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9479 enum machine_mode mode =
9480 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9482 /* Do not handle DImode compares that go trought special path. Also we can't
9483 deal with FP compares yet. This is possible to add. */
9484 if ((mode == DImode && !TARGET_64BIT))
9486 if (FLOAT_MODE_P (mode))
9488 rtx second_test = NULL, bypass_test = NULL;
9489 rtx compare_op, compare_seq;
9491 /* Shortcut: following common codes never translate into carry flag compares. */
9492 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9493 || code == ORDERED || code == UNORDERED)
9496 /* These comparisons require zero flag; swap operands so they won't. */
9497 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9503 code = swap_condition (code);
9506 /* Try to expand the comparison and verify that we end up with carry flag
9507 based comparison. This is fails to be true only when we decide to expand
9508 comparison using arithmetic that is not too common scenario. */
9510 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9511 &second_test, &bypass_test);
9512 compare_seq = get_insns ();
9515 if (second_test || bypass_test)
9517 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9518 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9519 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9521 code = GET_CODE (compare_op);
9522 if (code != LTU && code != GEU)
9524 emit_insn (compare_seq);
9528 if (!INTEGRAL_MODE_P (mode))
9536 /* Convert a==0 into (unsigned)a<1. */
9539 if (op1 != const0_rtx)
9542 code = (code == EQ ? LTU : GEU);
9545 /* Convert a>b into b<a or a>=b-1. */
9548 if (GET_CODE (op1) == CONST_INT)
9550 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9551 /* Bail out on overflow. We still can swap operands but that
9552 would force loading of the constant into register. */
9553 if (op1 == const0_rtx
9554 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9556 code = (code == GTU ? GEU : LTU);
9563 code = (code == GTU ? LTU : GEU);
9567 /* Convert a>=0 into (unsigned)a<0x80000000. */
9570 if (mode == DImode || op1 != const0_rtx)
9572 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9573 code = (code == LT ? GEU : LTU);
9577 if (mode == DImode || op1 != constm1_rtx)
9579 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9580 code = (code == LE ? GEU : LTU);
9586 /* Swapping operands may cause constant to appear as first operand. */
9587 if (!nonimmediate_operand (op0, VOIDmode))
9591 op0 = force_reg (mode, op0);
9593 ix86_compare_op0 = op0;
9594 ix86_compare_op1 = op1;
9595 *pop = ix86_expand_compare (code, NULL, NULL);
9596 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9602 ix86_expand_int_movcc (rtx operands[])
9604 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9605 rtx compare_seq, compare_op;
9606 rtx second_test, bypass_test;
9607 enum machine_mode mode = GET_MODE (operands[0]);
9608 bool sign_bit_compare_p = false;;
9611 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9612 compare_seq = get_insns ();
9615 compare_code = GET_CODE (compare_op);
9617 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9618 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9619 sign_bit_compare_p = true;
9621 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9622 HImode insns, we'd be swallowed in word prefix ops. */
9624 if ((mode != HImode || TARGET_FAST_PREFIX)
9625 && (mode != DImode || TARGET_64BIT)
9626 && GET_CODE (operands[2]) == CONST_INT
9627 && GET_CODE (operands[3]) == CONST_INT)
9629 rtx out = operands[0];
9630 HOST_WIDE_INT ct = INTVAL (operands[2]);
9631 HOST_WIDE_INT cf = INTVAL (operands[3]);
9635 /* Sign bit compares are better done using shifts than we do by using
9637 if (sign_bit_compare_p
9638 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9639 ix86_compare_op1, &compare_op))
9641 /* Detect overlap between destination and compare sources. */
9644 if (!sign_bit_compare_p)
9648 compare_code = GET_CODE (compare_op);
9650 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9651 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9654 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9657 /* To simplify rest of code, restrict to the GEU case. */
9658 if (compare_code == LTU)
9660 HOST_WIDE_INT tmp = ct;
9663 compare_code = reverse_condition (compare_code);
9664 code = reverse_condition (code);
9669 PUT_CODE (compare_op,
9670 reverse_condition_maybe_unordered
9671 (GET_CODE (compare_op)));
9673 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9677 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9678 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9679 tmp = gen_reg_rtx (mode);
9682 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9684 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9688 if (code == GT || code == GE)
9689 code = reverse_condition (code);
9692 HOST_WIDE_INT tmp = ct;
9697 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9698 ix86_compare_op1, VOIDmode, 0, -1);
9711 tmp = expand_simple_binop (mode, PLUS,
9713 copy_rtx (tmp), 1, OPTAB_DIRECT);
9724 tmp = expand_simple_binop (mode, IOR,
9726 copy_rtx (tmp), 1, OPTAB_DIRECT);
9728 else if (diff == -1 && ct)
9738 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9740 tmp = expand_simple_binop (mode, PLUS,
9741 copy_rtx (tmp), GEN_INT (cf),
9742 copy_rtx (tmp), 1, OPTAB_DIRECT);
9750 * andl cf - ct, dest
9760 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9763 tmp = expand_simple_binop (mode, AND,
9765 gen_int_mode (cf - ct, mode),
9766 copy_rtx (tmp), 1, OPTAB_DIRECT);
9768 tmp = expand_simple_binop (mode, PLUS,
9769 copy_rtx (tmp), GEN_INT (ct),
9770 copy_rtx (tmp), 1, OPTAB_DIRECT);
9773 if (!rtx_equal_p (tmp, out))
9774 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9776 return 1; /* DONE */
9782 tmp = ct, ct = cf, cf = tmp;
9784 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9786 /* We may be reversing unordered compare to normal compare, that
9787 is not valid in general (we may convert non-trapping condition
9788 to trapping one), however on i386 we currently emit all
9789 comparisons unordered. */
9790 compare_code = reverse_condition_maybe_unordered (compare_code);
9791 code = reverse_condition_maybe_unordered (code);
9795 compare_code = reverse_condition (compare_code);
9796 code = reverse_condition (code);
9801 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9802 && GET_CODE (ix86_compare_op1) == CONST_INT)
9804 if (ix86_compare_op1 == const0_rtx
9805 && (code == LT || code == GE))
9806 compare_code = code;
9807 else if (ix86_compare_op1 == constm1_rtx)
9811 else if (code == GT)
9816 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9817 if (compare_code != NIL
9818 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9819 && (cf == -1 || ct == -1))
9821 /* If lea code below could be used, only optimize
9822 if it results in a 2 insn sequence. */
9824 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9825 || diff == 3 || diff == 5 || diff == 9)
9826 || (compare_code == LT && ct == -1)
9827 || (compare_code == GE && cf == -1))
9830 * notl op1 (if necessary)
9838 code = reverse_condition (code);
9841 out = emit_store_flag (out, code, ix86_compare_op0,
9842 ix86_compare_op1, VOIDmode, 0, -1);
9844 out = expand_simple_binop (mode, IOR,
9846 out, 1, OPTAB_DIRECT);
9847 if (out != operands[0])
9848 emit_move_insn (operands[0], out);
9850 return 1; /* DONE */
9855 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9856 || diff == 3 || diff == 5 || diff == 9)
9857 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9858 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9864 * lea cf(dest*(ct-cf)),dest
9868 * This also catches the degenerate setcc-only case.
9874 out = emit_store_flag (out, code, ix86_compare_op0,
9875 ix86_compare_op1, VOIDmode, 0, 1);
9878 /* On x86_64 the lea instruction operates on Pmode, so we need
9879 to get arithmetics done in proper mode to match. */
9881 tmp = copy_rtx (out);
9885 out1 = copy_rtx (out);
9886 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9890 tmp = gen_rtx_PLUS (mode, tmp, out1);
9896 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9899 if (!rtx_equal_p (tmp, out))
9902 out = force_operand (tmp, copy_rtx (out));
9904 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9906 if (!rtx_equal_p (out, operands[0]))
9907 emit_move_insn (operands[0], copy_rtx (out));
9909 return 1; /* DONE */
9913 * General case: Jumpful:
9914 * xorl dest,dest cmpl op1, op2
9915 * cmpl op1, op2 movl ct, dest
9917 * decl dest movl cf, dest
9918 * andl (cf-ct),dest 1:
9923 * This is reasonably steep, but branch mispredict costs are
9924 * high on modern cpus, so consider failing only if optimizing
9928 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9929 && BRANCH_COST >= 2)
9935 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9936 /* We may be reversing unordered compare to normal compare,
9937 that is not valid in general (we may convert non-trapping
9938 condition to trapping one), however on i386 we currently
9939 emit all comparisons unordered. */
9940 code = reverse_condition_maybe_unordered (code);
9943 code = reverse_condition (code);
9944 if (compare_code != NIL)
9945 compare_code = reverse_condition (compare_code);
9949 if (compare_code != NIL)
9951 /* notl op1 (if needed)
9956 For x < 0 (resp. x <= -1) there will be no notl,
9957 so if possible swap the constants to get rid of the
9959 True/false will be -1/0 while code below (store flag
9960 followed by decrement) is 0/-1, so the constants need
9961 to be exchanged once more. */
9963 if (compare_code == GE || !cf)
9965 code = reverse_condition (code);
9970 HOST_WIDE_INT tmp = cf;
9975 out = emit_store_flag (out, code, ix86_compare_op0,
9976 ix86_compare_op1, VOIDmode, 0, -1);
9980 out = emit_store_flag (out, code, ix86_compare_op0,
9981 ix86_compare_op1, VOIDmode, 0, 1);
9983 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9984 copy_rtx (out), 1, OPTAB_DIRECT);
9987 out = expand_simple_binop (mode, AND, copy_rtx (out),
9988 gen_int_mode (cf - ct, mode),
9989 copy_rtx (out), 1, OPTAB_DIRECT);
9991 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9992 copy_rtx (out), 1, OPTAB_DIRECT);
9993 if (!rtx_equal_p (out, operands[0]))
9994 emit_move_insn (operands[0], copy_rtx (out));
9996 return 1; /* DONE */
10000 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10002 /* Try a few things more with specific constants and a variable. */
10005 rtx var, orig_out, out, tmp;
10007 if (BRANCH_COST <= 2)
10008 return 0; /* FAIL */
10010 /* If one of the two operands is an interesting constant, load a
10011 constant with the above and mask it in with a logical operation. */
10013 if (GET_CODE (operands[2]) == CONST_INT)
10016 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10017 operands[3] = constm1_rtx, op = and_optab;
10018 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10019 operands[3] = const0_rtx, op = ior_optab;
10021 return 0; /* FAIL */
10023 else if (GET_CODE (operands[3]) == CONST_INT)
10026 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10027 operands[2] = constm1_rtx, op = and_optab;
10028 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10029 operands[2] = const0_rtx, op = ior_optab;
10031 return 0; /* FAIL */
10034 return 0; /* FAIL */
10036 orig_out = operands[0];
10037 tmp = gen_reg_rtx (mode);
10040 /* Recurse to get the constant loaded. */
10041 if (ix86_expand_int_movcc (operands) == 0)
10042 return 0; /* FAIL */
10044 /* Mask in the interesting variable. */
10045 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10047 if (!rtx_equal_p (out, orig_out))
10048 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10050 return 1; /* DONE */
10054 * For comparison with above,
10064 if (! nonimmediate_operand (operands[2], mode))
10065 operands[2] = force_reg (mode, operands[2]);
10066 if (! nonimmediate_operand (operands[3], mode))
10067 operands[3] = force_reg (mode, operands[3]);
10069 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10071 rtx tmp = gen_reg_rtx (mode);
10072 emit_move_insn (tmp, operands[3]);
10075 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10077 rtx tmp = gen_reg_rtx (mode);
10078 emit_move_insn (tmp, operands[2]);
10082 if (! register_operand (operands[2], VOIDmode)
10084 || ! register_operand (operands[3], VOIDmode)))
10085 operands[2] = force_reg (mode, operands[2]);
10088 && ! register_operand (operands[3], VOIDmode))
10089 operands[3] = force_reg (mode, operands[3]);
10091 emit_insn (compare_seq);
10092 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10093 gen_rtx_IF_THEN_ELSE (mode,
10094 compare_op, operands[2],
10097 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10098 gen_rtx_IF_THEN_ELSE (mode,
10100 copy_rtx (operands[3]),
10101 copy_rtx (operands[0]))));
10103 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10104 gen_rtx_IF_THEN_ELSE (mode,
10106 copy_rtx (operands[2]),
10107 copy_rtx (operands[0]))));
10109 return 1; /* DONE */
10113 ix86_expand_fp_movcc (rtx operands[])
10115 enum rtx_code code;
10117 rtx compare_op, second_test, bypass_test;
10119 /* For SF/DFmode conditional moves based on comparisons
10120 in same mode, we may want to use SSE min/max instructions. */
10121 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10122 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10123 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10124 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10125 && (!TARGET_IEEE_FP
10126 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10127 /* We may be called from the post-reload splitter. */
10128 && (!REG_P (operands[0])
10129 || SSE_REG_P (operands[0])
10130 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10132 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10133 code = GET_CODE (operands[1]);
10135 /* See if we have (cross) match between comparison operands and
10136 conditional move operands. */
10137 if (rtx_equal_p (operands[2], op1))
10142 code = reverse_condition_maybe_unordered (code);
10144 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10146 /* Check for min operation. */
10147 if (code == LT || code == UNLE)
10155 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10156 if (memory_operand (op0, VOIDmode))
10157 op0 = force_reg (GET_MODE (operands[0]), op0);
10158 if (GET_MODE (operands[0]) == SFmode)
10159 emit_insn (gen_minsf3 (operands[0], op0, op1));
10161 emit_insn (gen_mindf3 (operands[0], op0, op1));
10164 /* Check for max operation. */
10165 if (code == GT || code == UNGE)
10173 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10174 if (memory_operand (op0, VOIDmode))
10175 op0 = force_reg (GET_MODE (operands[0]), op0);
10176 if (GET_MODE (operands[0]) == SFmode)
10177 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10179 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10183 /* Manage condition to be sse_comparison_operator. In case we are
10184 in non-ieee mode, try to canonicalize the destination operand
10185 to be first in the comparison - this helps reload to avoid extra
10187 if (!sse_comparison_operator (operands[1], VOIDmode)
10188 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10190 rtx tmp = ix86_compare_op0;
10191 ix86_compare_op0 = ix86_compare_op1;
10192 ix86_compare_op1 = tmp;
10193 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10194 VOIDmode, ix86_compare_op0,
10197 /* Similarly try to manage result to be first operand of conditional
10198 move. We also don't support the NE comparison on SSE, so try to
10200 if ((rtx_equal_p (operands[0], operands[3])
10201 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10202 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10204 rtx tmp = operands[2];
10205 operands[2] = operands[3];
10207 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10208 (GET_CODE (operands[1])),
10209 VOIDmode, ix86_compare_op0,
10212 if (GET_MODE (operands[0]) == SFmode)
10213 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10214 operands[2], operands[3],
10215 ix86_compare_op0, ix86_compare_op1));
10217 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10218 operands[2], operands[3],
10219 ix86_compare_op0, ix86_compare_op1));
10223 /* The floating point conditional move instructions don't directly
10224 support conditions resulting from a signed integer comparison. */
10226 code = GET_CODE (operands[1]);
10227 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10229 /* The floating point conditional move instructions don't directly
10230 support signed integer comparisons. */
10232 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10234 if (second_test != NULL || bypass_test != NULL)
10236 tmp = gen_reg_rtx (QImode);
10237 ix86_expand_setcc (code, tmp);
10239 ix86_compare_op0 = tmp;
10240 ix86_compare_op1 = const0_rtx;
10241 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10243 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10245 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10246 emit_move_insn (tmp, operands[3]);
10249 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10251 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10252 emit_move_insn (tmp, operands[2]);
10256 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10257 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10262 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10263 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10268 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10269 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10277 /* Expand conditional increment or decrement using adb/sbb instructions.
10278 The default case using setcc followed by the conditional move can be
10279 done by generic code. */
10281 ix86_expand_int_addcc (rtx operands[])
10283 enum rtx_code code = GET_CODE (operands[1]);
10285 rtx val = const0_rtx;
10286 bool fpcmp = false;
10287 enum machine_mode mode = GET_MODE (operands[0]);
10289 if (operands[3] != const1_rtx
10290 && operands[3] != constm1_rtx)
10292 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10293 ix86_compare_op1, &compare_op))
10295 code = GET_CODE (compare_op);
10297 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10298 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10301 code = ix86_fp_compare_code_to_integer (code);
10308 PUT_CODE (compare_op,
10309 reverse_condition_maybe_unordered
10310 (GET_CODE (compare_op)));
10312 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10314 PUT_MODE (compare_op, mode);
10316 /* Construct either adc or sbb insn. */
10317 if ((code == LTU) == (operands[3] == constm1_rtx))
10319 switch (GET_MODE (operands[0]))
10322 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10325 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10328 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10331 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10339 switch (GET_MODE (operands[0]))
10342 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10345 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10348 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10351 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10357 return 1; /* DONE */
10361 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10362 works for floating pointer parameters and nonoffsetable memories.
10363 For pushes, it returns just stack offsets; the values will be saved
10364 in the right order. Maximally three parts are generated. */
10367 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10372 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10374 size = (GET_MODE_SIZE (mode) + 4) / 8;
10376 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10378 if (size < 2 || size > 3)
10381 /* Optimize constant pool reference to immediates. This is used by fp
10382 moves, that force all constants to memory to allow combining. */
10383 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10385 rtx tmp = maybe_get_pool_constant (operand);
10390 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10392 /* The only non-offsetable memories we handle are pushes. */
10393 if (! push_operand (operand, VOIDmode))
10396 operand = copy_rtx (operand);
10397 PUT_MODE (operand, Pmode);
10398 parts[0] = parts[1] = parts[2] = operand;
10400 else if (!TARGET_64BIT)
10402 if (mode == DImode)
10403 split_di (&operand, 1, &parts[0], &parts[1]);
10406 if (REG_P (operand))
10408 if (!reload_completed)
10410 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10411 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10413 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10415 else if (offsettable_memref_p (operand))
10417 operand = adjust_address (operand, SImode, 0);
10418 parts[0] = operand;
10419 parts[1] = adjust_address (operand, SImode, 4);
10421 parts[2] = adjust_address (operand, SImode, 8);
10423 else if (GET_CODE (operand) == CONST_DOUBLE)
10428 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10432 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10433 parts[2] = gen_int_mode (l[2], SImode);
10436 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10441 parts[1] = gen_int_mode (l[1], SImode);
10442 parts[0] = gen_int_mode (l[0], SImode);
10450 if (mode == TImode)
10451 split_ti (&operand, 1, &parts[0], &parts[1]);
10452 if (mode == XFmode || mode == TFmode)
10454 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10455 if (REG_P (operand))
10457 if (!reload_completed)
10459 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10460 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10462 else if (offsettable_memref_p (operand))
10464 operand = adjust_address (operand, DImode, 0);
10465 parts[0] = operand;
10466 parts[1] = adjust_address (operand, upper_mode, 8);
10468 else if (GET_CODE (operand) == CONST_DOUBLE)
10473 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10474 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10475 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10476 if (HOST_BITS_PER_WIDE_INT >= 64)
10479 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10480 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10483 parts[0] = immed_double_const (l[0], l[1], DImode);
10484 if (upper_mode == SImode)
10485 parts[1] = gen_int_mode (l[2], SImode);
10486 else if (HOST_BITS_PER_WIDE_INT >= 64)
10489 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10490 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10493 parts[1] = immed_double_const (l[2], l[3], DImode);
10503 /* Emit insns to perform a move or push of DI, DF, and XF values.
10504 Return false when normal moves are needed; true when all required
10505 insns have been emitted. Operands 2-4 contain the input values
10506 int the correct order; operands 5-7 contain the output values. */
10509 ix86_split_long_move (rtx operands[])
10514 int collisions = 0;
10515 enum machine_mode mode = GET_MODE (operands[0]);
10517 /* The DFmode expanders may ask us to move double.
10518 For 64bit target this is single move. By hiding the fact
10519 here we simplify i386.md splitters. */
10520 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10522 /* Optimize constant pool reference to immediates. This is used by
10523 fp moves, that force all constants to memory to allow combining. */
10525 if (GET_CODE (operands[1]) == MEM
10526 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10527 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10528 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10529 if (push_operand (operands[0], VOIDmode))
10531 operands[0] = copy_rtx (operands[0]);
10532 PUT_MODE (operands[0], Pmode);
10535 operands[0] = gen_lowpart (DImode, operands[0]);
10536 operands[1] = gen_lowpart (DImode, operands[1]);
10537 emit_move_insn (operands[0], operands[1]);
10541 /* The only non-offsettable memory we handle is push. */
10542 if (push_operand (operands[0], VOIDmode))
10544 else if (GET_CODE (operands[0]) == MEM
10545 && ! offsettable_memref_p (operands[0]))
10548 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10549 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10551 /* When emitting push, take care for source operands on the stack. */
10552 if (push && GET_CODE (operands[1]) == MEM
10553 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10556 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10557 XEXP (part[1][2], 0));
10558 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10559 XEXP (part[1][1], 0));
10562 /* We need to do copy in the right order in case an address register
10563 of the source overlaps the destination. */
10564 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10566 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10568 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10571 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10574 /* Collision in the middle part can be handled by reordering. */
10575 if (collisions == 1 && nparts == 3
10576 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10579 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10580 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10583 /* If there are more collisions, we can't handle it by reordering.
10584 Do an lea to the last part and use only one colliding move. */
10585 else if (collisions > 1)
10591 base = part[0][nparts - 1];
10593 /* Handle the case when the last part isn't valid for lea.
10594 Happens in 64-bit mode storing the 12-byte XFmode. */
10595 if (GET_MODE (base) != Pmode)
10596 base = gen_rtx_REG (Pmode, REGNO (base));
10598 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10599 part[1][0] = replace_equiv_address (part[1][0], base);
10600 part[1][1] = replace_equiv_address (part[1][1],
10601 plus_constant (base, UNITS_PER_WORD));
10603 part[1][2] = replace_equiv_address (part[1][2],
10604 plus_constant (base, 8));
10614 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10615 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10616 emit_move_insn (part[0][2], part[1][2]);
10621 /* In 64bit mode we don't have 32bit push available. In case this is
10622 register, it is OK - we will just use larger counterpart. We also
10623 retype memory - these comes from attempt to avoid REX prefix on
10624 moving of second half of TFmode value. */
10625 if (GET_MODE (part[1][1]) == SImode)
10627 if (GET_CODE (part[1][1]) == MEM)
10628 part[1][1] = adjust_address (part[1][1], DImode, 0);
10629 else if (REG_P (part[1][1]))
10630 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10633 if (GET_MODE (part[1][0]) == SImode)
10634 part[1][0] = part[1][1];
10637 emit_move_insn (part[0][1], part[1][1]);
10638 emit_move_insn (part[0][0], part[1][0]);
10642 /* Choose correct order to not overwrite the source before it is copied. */
10643 if ((REG_P (part[0][0])
10644 && REG_P (part[1][1])
10645 && (REGNO (part[0][0]) == REGNO (part[1][1])
10647 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10649 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10653 operands[2] = part[0][2];
10654 operands[3] = part[0][1];
10655 operands[4] = part[0][0];
10656 operands[5] = part[1][2];
10657 operands[6] = part[1][1];
10658 operands[7] = part[1][0];
10662 operands[2] = part[0][1];
10663 operands[3] = part[0][0];
10664 operands[5] = part[1][1];
10665 operands[6] = part[1][0];
10672 operands[2] = part[0][0];
10673 operands[3] = part[0][1];
10674 operands[4] = part[0][2];
10675 operands[5] = part[1][0];
10676 operands[6] = part[1][1];
10677 operands[7] = part[1][2];
10681 operands[2] = part[0][0];
10682 operands[3] = part[0][1];
10683 operands[5] = part[1][0];
10684 operands[6] = part[1][1];
10687 emit_move_insn (operands[2], operands[5]);
10688 emit_move_insn (operands[3], operands[6]);
10690 emit_move_insn (operands[4], operands[7]);
10696 ix86_split_ashldi (rtx *operands, rtx scratch)
10698 rtx low[2], high[2];
10701 if (GET_CODE (operands[2]) == CONST_INT)
10703 split_di (operands, 2, low, high);
10704 count = INTVAL (operands[2]) & 63;
10708 emit_move_insn (high[0], low[1]);
10709 emit_move_insn (low[0], const0_rtx);
10712 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10716 if (!rtx_equal_p (operands[0], operands[1]))
10717 emit_move_insn (operands[0], operands[1]);
10718 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10719 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10724 if (!rtx_equal_p (operands[0], operands[1]))
10725 emit_move_insn (operands[0], operands[1]);
10727 split_di (operands, 1, low, high);
10729 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10730 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10732 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10734 if (! no_new_pseudos)
10735 scratch = force_reg (SImode, const0_rtx);
10737 emit_move_insn (scratch, const0_rtx);
10739 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10743 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10748 ix86_split_ashrdi (rtx *operands, rtx scratch)
10750 rtx low[2], high[2];
10753 if (GET_CODE (operands[2]) == CONST_INT)
10755 split_di (operands, 2, low, high);
10756 count = INTVAL (operands[2]) & 63;
10760 emit_move_insn (low[0], high[1]);
10762 if (! reload_completed)
10763 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10766 emit_move_insn (high[0], low[0]);
10767 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10771 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10775 if (!rtx_equal_p (operands[0], operands[1]))
10776 emit_move_insn (operands[0], operands[1]);
10777 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10778 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10783 if (!rtx_equal_p (operands[0], operands[1]))
10784 emit_move_insn (operands[0], operands[1]);
10786 split_di (operands, 1, low, high);
10788 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10789 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10791 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10793 if (! no_new_pseudos)
10794 scratch = gen_reg_rtx (SImode);
10795 emit_move_insn (scratch, high[0]);
10796 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10797 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10801 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10806 ix86_split_lshrdi (rtx *operands, rtx scratch)
10808 rtx low[2], high[2];
10811 if (GET_CODE (operands[2]) == CONST_INT)
10813 split_di (operands, 2, low, high);
10814 count = INTVAL (operands[2]) & 63;
10818 emit_move_insn (low[0], high[1]);
10819 emit_move_insn (high[0], const0_rtx);
10822 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10826 if (!rtx_equal_p (operands[0], operands[1]))
10827 emit_move_insn (operands[0], operands[1]);
10828 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10829 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10834 if (!rtx_equal_p (operands[0], operands[1]))
10835 emit_move_insn (operands[0], operands[1]);
10837 split_di (operands, 1, low, high);
10839 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10840 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10842 /* Heh. By reversing the arguments, we can reuse this pattern. */
10843 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10845 if (! no_new_pseudos)
10846 scratch = force_reg (SImode, const0_rtx);
10848 emit_move_insn (scratch, const0_rtx);
10850 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10854 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10858 /* Helper function for the string operations below. Dest VARIABLE whether
10859 it is aligned to VALUE bytes. If true, jump to the label. */
10861 ix86_expand_aligntest (rtx variable, int value)
10863 rtx label = gen_label_rtx ();
10864 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10865 if (GET_MODE (variable) == DImode)
10866 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10868 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10869 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10874 /* Adjust COUNTER by the VALUE. */
10876 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10878 if (GET_MODE (countreg) == DImode)
10879 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10881 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10884 /* Zero extend possibly SImode EXP to Pmode register. */
10886 ix86_zero_extend_to_Pmode (rtx exp)
10889 if (GET_MODE (exp) == VOIDmode)
10890 return force_reg (Pmode, exp);
10891 if (GET_MODE (exp) == Pmode)
10892 return copy_to_mode_reg (Pmode, exp);
10893 r = gen_reg_rtx (Pmode);
10894 emit_insn (gen_zero_extendsidi2 (r, exp));
10898 /* Expand string move (memcpy) operation. Use i386 string operations when
10899 profitable. expand_clrstr contains similar code. */
10901 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10903 rtx srcreg, destreg, countreg;
10904 enum machine_mode counter_mode;
10905 HOST_WIDE_INT align = 0;
10906 unsigned HOST_WIDE_INT count = 0;
10909 if (GET_CODE (align_exp) == CONST_INT)
10910 align = INTVAL (align_exp);
10912 /* Can't use any of this if the user has appropriated esi or edi. */
10913 if (global_regs[4] || global_regs[5])
10916 /* This simple hack avoids all inlining code and simplifies code below. */
10917 if (!TARGET_ALIGN_STRINGOPS)
10920 if (GET_CODE (count_exp) == CONST_INT)
10922 count = INTVAL (count_exp);
10923 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10927 /* Figure out proper mode for counter. For 32bits it is always SImode,
10928 for 64bits use SImode when possible, otherwise DImode.
10929 Set count to number of bytes copied when known at compile time. */
10930 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10931 || x86_64_zero_extended_value (count_exp))
10932 counter_mode = SImode;
10934 counter_mode = DImode;
10938 if (counter_mode != SImode && counter_mode != DImode)
10941 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10942 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10944 emit_insn (gen_cld ());
10946 /* When optimizing for size emit simple rep ; movsb instruction for
10947 counts not divisible by 4. */
10949 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10951 countreg = ix86_zero_extend_to_Pmode (count_exp);
10953 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10954 destreg, srcreg, countreg));
10956 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10957 destreg, srcreg, countreg));
10960 /* For constant aligned (or small unaligned) copies use rep movsl
10961 followed by code copying the rest. For PentiumPro ensure 8 byte
10962 alignment to allow rep movsl acceleration. */
10964 else if (count != 0
10966 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10967 || optimize_size || count < (unsigned int) 64))
10969 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10970 if (count & ~(size - 1))
10972 countreg = copy_to_mode_reg (counter_mode,
10973 GEN_INT ((count >> (size == 4 ? 2 : 3))
10974 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10975 countreg = ix86_zero_extend_to_Pmode (countreg);
10979 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10980 destreg, srcreg, countreg));
10982 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10983 destreg, srcreg, countreg));
10986 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10987 destreg, srcreg, countreg));
10989 if (size == 8 && (count & 0x04))
10990 emit_insn (gen_strmovsi (destreg, srcreg));
10992 emit_insn (gen_strmovhi (destreg, srcreg));
10994 emit_insn (gen_strmovqi (destreg, srcreg));
10996 /* The generic code based on the glibc implementation:
10997 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10998 allowing accelerated copying there)
10999 - copy the data using rep movsl
11000 - copy the rest. */
11005 int desired_alignment = (TARGET_PENTIUMPRO
11006 && (count == 0 || count >= (unsigned int) 260)
11007 ? 8 : UNITS_PER_WORD);
11009 /* In case we don't know anything about the alignment, default to
11010 library version, since it is usually equally fast and result in
11013 Also emit call when we know that the count is large and call overhead
11014 will not be important. */
11015 if (!TARGET_INLINE_ALL_STRINGOPS
11016 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11022 if (TARGET_SINGLE_STRINGOP)
11023 emit_insn (gen_cld ());
11025 countreg2 = gen_reg_rtx (Pmode);
11026 countreg = copy_to_mode_reg (counter_mode, count_exp);
11028 /* We don't use loops to align destination and to copy parts smaller
11029 than 4 bytes, because gcc is able to optimize such code better (in
11030 the case the destination or the count really is aligned, gcc is often
11031 able to predict the branches) and also it is friendlier to the
11032 hardware branch prediction.
11034 Using loops is beneficial for generic case, because we can
11035 handle small counts using the loops. Many CPUs (such as Athlon)
11036 have large REP prefix setup costs.
11038 This is quite costly. Maybe we can revisit this decision later or
11039 add some customizability to this code. */
11041 if (count == 0 && align < desired_alignment)
11043 label = gen_label_rtx ();
11044 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11045 LEU, 0, counter_mode, 1, label);
11049 rtx label = ix86_expand_aligntest (destreg, 1);
11050 emit_insn (gen_strmovqi (destreg, srcreg));
11051 ix86_adjust_counter (countreg, 1);
11052 emit_label (label);
11053 LABEL_NUSES (label) = 1;
11057 rtx label = ix86_expand_aligntest (destreg, 2);
11058 emit_insn (gen_strmovhi (destreg, srcreg));
11059 ix86_adjust_counter (countreg, 2);
11060 emit_label (label);
11061 LABEL_NUSES (label) = 1;
11063 if (align <= 4 && desired_alignment > 4)
11065 rtx label = ix86_expand_aligntest (destreg, 4);
11066 emit_insn (gen_strmovsi (destreg, srcreg));
11067 ix86_adjust_counter (countreg, 4);
11068 emit_label (label);
11069 LABEL_NUSES (label) = 1;
11072 if (label && desired_alignment > 4 && !TARGET_64BIT)
11074 emit_label (label);
11075 LABEL_NUSES (label) = 1;
11078 if (!TARGET_SINGLE_STRINGOP)
11079 emit_insn (gen_cld ());
11082 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11084 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11085 destreg, srcreg, countreg2));
11089 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11090 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11091 destreg, srcreg, countreg2));
11096 emit_label (label);
11097 LABEL_NUSES (label) = 1;
11099 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11100 emit_insn (gen_strmovsi (destreg, srcreg));
11101 if ((align <= 4 || count == 0) && TARGET_64BIT)
11103 rtx label = ix86_expand_aligntest (countreg, 4);
11104 emit_insn (gen_strmovsi (destreg, srcreg));
11105 emit_label (label);
11106 LABEL_NUSES (label) = 1;
11108 if (align > 2 && count != 0 && (count & 2))
11109 emit_insn (gen_strmovhi (destreg, srcreg));
11110 if (align <= 2 || count == 0)
11112 rtx label = ix86_expand_aligntest (countreg, 2);
11113 emit_insn (gen_strmovhi (destreg, srcreg));
11114 emit_label (label);
11115 LABEL_NUSES (label) = 1;
11117 if (align > 1 && count != 0 && (count & 1))
11118 emit_insn (gen_strmovqi (destreg, srcreg));
11119 if (align <= 1 || count == 0)
11121 rtx label = ix86_expand_aligntest (countreg, 1);
11122 emit_insn (gen_strmovqi (destreg, srcreg));
11123 emit_label (label);
11124 LABEL_NUSES (label) = 1;
11128 insns = get_insns ();
11131 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11136 /* Expand string clear operation (bzero). Use i386 string operations when
11137 profitable. expand_movstr contains similar code. */
11139 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11141 rtx destreg, zeroreg, countreg;
11142 enum machine_mode counter_mode;
11143 HOST_WIDE_INT align = 0;
11144 unsigned HOST_WIDE_INT count = 0;
11146 if (GET_CODE (align_exp) == CONST_INT)
11147 align = INTVAL (align_exp);
11149 /* Can't use any of this if the user has appropriated esi. */
11150 if (global_regs[4])
11153 /* This simple hack avoids all inlining code and simplifies code below. */
11154 if (!TARGET_ALIGN_STRINGOPS)
11157 if (GET_CODE (count_exp) == CONST_INT)
11159 count = INTVAL (count_exp);
11160 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11163 /* Figure out proper mode for counter. For 32bits it is always SImode,
11164 for 64bits use SImode when possible, otherwise DImode.
11165 Set count to number of bytes copied when known at compile time. */
11166 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11167 || x86_64_zero_extended_value (count_exp))
11168 counter_mode = SImode;
11170 counter_mode = DImode;
11172 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11174 emit_insn (gen_cld ());
11176 /* When optimizing for size emit simple rep ; movsb instruction for
11177 counts not divisible by 4. */
11179 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11181 countreg = ix86_zero_extend_to_Pmode (count_exp);
11182 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11184 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11185 destreg, countreg));
11187 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11188 destreg, countreg));
11190 else if (count != 0
11192 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11193 || optimize_size || count < (unsigned int) 64))
11195 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11196 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11197 if (count & ~(size - 1))
11199 countreg = copy_to_mode_reg (counter_mode,
11200 GEN_INT ((count >> (size == 4 ? 2 : 3))
11201 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11202 countreg = ix86_zero_extend_to_Pmode (countreg);
11206 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11207 destreg, countreg));
11209 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11210 destreg, countreg));
11213 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11214 destreg, countreg));
11216 if (size == 8 && (count & 0x04))
11217 emit_insn (gen_strsetsi (destreg,
11218 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11220 emit_insn (gen_strsethi (destreg,
11221 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11223 emit_insn (gen_strsetqi (destreg,
11224 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11230 /* Compute desired alignment of the string operation. */
11231 int desired_alignment = (TARGET_PENTIUMPRO
11232 && (count == 0 || count >= (unsigned int) 260)
11233 ? 8 : UNITS_PER_WORD);
11235 /* In case we don't know anything about the alignment, default to
11236 library version, since it is usually equally fast and result in
11239 Also emit call when we know that the count is large and call overhead
11240 will not be important. */
11241 if (!TARGET_INLINE_ALL_STRINGOPS
11242 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11245 if (TARGET_SINGLE_STRINGOP)
11246 emit_insn (gen_cld ());
11248 countreg2 = gen_reg_rtx (Pmode);
11249 countreg = copy_to_mode_reg (counter_mode, count_exp);
11250 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11252 if (count == 0 && align < desired_alignment)
11254 label = gen_label_rtx ();
11255 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11256 LEU, 0, counter_mode, 1, label);
11260 rtx label = ix86_expand_aligntest (destreg, 1);
11261 emit_insn (gen_strsetqi (destreg,
11262 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11263 ix86_adjust_counter (countreg, 1);
11264 emit_label (label);
11265 LABEL_NUSES (label) = 1;
11269 rtx label = ix86_expand_aligntest (destreg, 2);
11270 emit_insn (gen_strsethi (destreg,
11271 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11272 ix86_adjust_counter (countreg, 2);
11273 emit_label (label);
11274 LABEL_NUSES (label) = 1;
11276 if (align <= 4 && desired_alignment > 4)
11278 rtx label = ix86_expand_aligntest (destreg, 4);
11279 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11280 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11282 ix86_adjust_counter (countreg, 4);
11283 emit_label (label);
11284 LABEL_NUSES (label) = 1;
11287 if (label && desired_alignment > 4 && !TARGET_64BIT)
11289 emit_label (label);
11290 LABEL_NUSES (label) = 1;
11294 if (!TARGET_SINGLE_STRINGOP)
11295 emit_insn (gen_cld ());
11298 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11300 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11301 destreg, countreg2));
11305 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11306 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11307 destreg, countreg2));
11311 emit_label (label);
11312 LABEL_NUSES (label) = 1;
11315 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11316 emit_insn (gen_strsetsi (destreg,
11317 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11318 if (TARGET_64BIT && (align <= 4 || count == 0))
11320 rtx label = ix86_expand_aligntest (countreg, 4);
11321 emit_insn (gen_strsetsi (destreg,
11322 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11323 emit_label (label);
11324 LABEL_NUSES (label) = 1;
11326 if (align > 2 && count != 0 && (count & 2))
11327 emit_insn (gen_strsethi (destreg,
11328 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11329 if (align <= 2 || count == 0)
11331 rtx label = ix86_expand_aligntest (countreg, 2);
11332 emit_insn (gen_strsethi (destreg,
11333 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11334 emit_label (label);
11335 LABEL_NUSES (label) = 1;
11337 if (align > 1 && count != 0 && (count & 1))
11338 emit_insn (gen_strsetqi (destreg,
11339 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11340 if (align <= 1 || count == 0)
11342 rtx label = ix86_expand_aligntest (countreg, 1);
11343 emit_insn (gen_strsetqi (destreg,
11344 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11345 emit_label (label);
11346 LABEL_NUSES (label) = 1;
11351 /* Expand strlen. */
11353 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11355 rtx addr, scratch1, scratch2, scratch3, scratch4;
11357 /* The generic case of strlen expander is long. Avoid it's
11358 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11360 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11361 && !TARGET_INLINE_ALL_STRINGOPS
11363 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11366 addr = force_reg (Pmode, XEXP (src, 0));
11367 scratch1 = gen_reg_rtx (Pmode);
11369 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11372 /* Well it seems that some optimizer does not combine a call like
11373 foo(strlen(bar), strlen(bar));
11374 when the move and the subtraction is done here. It does calculate
11375 the length just once when these instructions are done inside of
11376 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11377 often used and I use one fewer register for the lifetime of
11378 output_strlen_unroll() this is better. */
11380 emit_move_insn (out, addr);
11382 ix86_expand_strlensi_unroll_1 (out, align);
11384 /* strlensi_unroll_1 returns the address of the zero at the end of
11385 the string, like memchr(), so compute the length by subtracting
11386 the start address. */
11388 emit_insn (gen_subdi3 (out, out, addr));
11390 emit_insn (gen_subsi3 (out, out, addr));
11394 scratch2 = gen_reg_rtx (Pmode);
11395 scratch3 = gen_reg_rtx (Pmode);
11396 scratch4 = force_reg (Pmode, constm1_rtx);
11398 emit_move_insn (scratch3, addr);
11399 eoschar = force_reg (QImode, eoschar);
11401 emit_insn (gen_cld ());
11404 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11405 align, scratch4, scratch3));
11406 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11407 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11411 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11412 align, scratch4, scratch3));
11413 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11414 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11420 /* Expand the appropriate insns for doing strlen if not just doing
11423 out = result, initialized with the start address
11424 align_rtx = alignment of the address.
11425 scratch = scratch register, initialized with the startaddress when
11426 not aligned, otherwise undefined
11428 This is just the body. It needs the initializations mentioned above and
11429 some address computing at the end. These things are done in i386.md. */
11432 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11436 rtx align_2_label = NULL_RTX;
11437 rtx align_3_label = NULL_RTX;
11438 rtx align_4_label = gen_label_rtx ();
11439 rtx end_0_label = gen_label_rtx ();
11441 rtx tmpreg = gen_reg_rtx (SImode);
11442 rtx scratch = gen_reg_rtx (SImode);
11446 if (GET_CODE (align_rtx) == CONST_INT)
11447 align = INTVAL (align_rtx);
11449 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11451 /* Is there a known alignment and is it less than 4? */
11454 rtx scratch1 = gen_reg_rtx (Pmode);
11455 emit_move_insn (scratch1, out);
11456 /* Is there a known alignment and is it not 2? */
11459 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11460 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11462 /* Leave just the 3 lower bits. */
11463 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11464 NULL_RTX, 0, OPTAB_WIDEN);
11466 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11467 Pmode, 1, align_4_label);
11468 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11469 Pmode, 1, align_2_label);
11470 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11471 Pmode, 1, align_3_label);
11475 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11476 check if is aligned to 4 - byte. */
11478 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11479 NULL_RTX, 0, OPTAB_WIDEN);
11481 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11482 Pmode, 1, align_4_label);
11485 mem = gen_rtx_MEM (QImode, out);
11487 /* Now compare the bytes. */
11489 /* Compare the first n unaligned byte on a byte per byte basis. */
11490 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11491 QImode, 1, end_0_label);
11493 /* Increment the address. */
11495 emit_insn (gen_adddi3 (out, out, const1_rtx));
11497 emit_insn (gen_addsi3 (out, out, const1_rtx));
11499 /* Not needed with an alignment of 2 */
11502 emit_label (align_2_label);
11504 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11508 emit_insn (gen_adddi3 (out, out, const1_rtx));
11510 emit_insn (gen_addsi3 (out, out, const1_rtx));
11512 emit_label (align_3_label);
11515 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11519 emit_insn (gen_adddi3 (out, out, const1_rtx));
11521 emit_insn (gen_addsi3 (out, out, const1_rtx));
11524 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11525 align this loop. It gives only huge programs, but does not help to
11527 emit_label (align_4_label);
11529 mem = gen_rtx_MEM (SImode, out);
11530 emit_move_insn (scratch, mem);
11532 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11534 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11536 /* This formula yields a nonzero result iff one of the bytes is zero.
11537 This saves three branches inside loop and many cycles. */
11539 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11540 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11541 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11542 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11543 gen_int_mode (0x80808080, SImode)));
11544 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11549 rtx reg = gen_reg_rtx (SImode);
11550 rtx reg2 = gen_reg_rtx (Pmode);
11551 emit_move_insn (reg, tmpreg);
11552 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11554 /* If zero is not in the first two bytes, move two bytes forward. */
11555 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11556 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11557 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11558 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11559 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11562 /* Emit lea manually to avoid clobbering of flags. */
11563 emit_insn (gen_rtx_SET (SImode, reg2,
11564 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11566 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11567 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11568 emit_insn (gen_rtx_SET (VOIDmode, out,
11569 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11576 rtx end_2_label = gen_label_rtx ();
11577 /* Is zero in the first two bytes? */
11579 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11580 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11581 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11582 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11583 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11585 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11586 JUMP_LABEL (tmp) = end_2_label;
11588 /* Not in the first two. Move two bytes forward. */
11589 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11591 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11593 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11595 emit_label (end_2_label);
11599 /* Avoid branch in fixing the byte. */
11600 tmpreg = gen_lowpart (QImode, tmpreg);
11601 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11602 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11604 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11606 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11608 emit_label (end_0_label);
11612 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11613 rtx callarg2 ATTRIBUTE_UNUSED,
11614 rtx pop, int sibcall)
11616 rtx use = NULL, call;
11618 if (pop == const0_rtx)
11620 if (TARGET_64BIT && pop)
11624 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11625 fnaddr = machopic_indirect_call_target (fnaddr);
11627 /* Static functions and indirect calls don't need the pic register. */
11628 if (! TARGET_64BIT && flag_pic
11629 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11630 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11631 use_reg (&use, pic_offset_table_rtx);
11633 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11635 rtx al = gen_rtx_REG (QImode, 0);
11636 emit_move_insn (al, callarg2);
11637 use_reg (&use, al);
11639 #endif /* TARGET_MACHO */
11641 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11643 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11644 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11646 if (sibcall && TARGET_64BIT
11647 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11650 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11651 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11652 emit_move_insn (fnaddr, addr);
11653 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11656 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11658 call = gen_rtx_SET (VOIDmode, retval, call);
11661 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11662 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11663 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11666 call = emit_call_insn (call);
11668 CALL_INSN_FUNCTION_USAGE (call) = use;
11672 /* Clear stack slot assignments remembered from previous functions.
11673 This is called from INIT_EXPANDERS once before RTL is emitted for each
11676 static struct machine_function *
11677 ix86_init_machine_status (void)
11679 struct machine_function *f;
11681 f = ggc_alloc_cleared (sizeof (struct machine_function));
11682 f->use_fast_prologue_epilogue_nregs = -1;
11687 /* Return a MEM corresponding to a stack slot with mode MODE.
11688 Allocate a new slot if necessary.
11690 The RTL for a function can have several slots available: N is
11691 which slot to use. */
11694 assign_386_stack_local (enum machine_mode mode, int n)
11696 struct stack_local_entry *s;
11698 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11701 for (s = ix86_stack_locals; s; s = s->next)
11702 if (s->mode == mode && s->n == n)
11705 s = (struct stack_local_entry *)
11706 ggc_alloc (sizeof (struct stack_local_entry));
11709 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11711 s->next = ix86_stack_locals;
11712 ix86_stack_locals = s;
11716 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11718 static GTY(()) rtx ix86_tls_symbol;
11720 ix86_tls_get_addr (void)
11723 if (!ix86_tls_symbol)
11725 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11726 (TARGET_GNU_TLS && !TARGET_64BIT)
11727 ? "___tls_get_addr"
11728 : "__tls_get_addr");
11731 return ix86_tls_symbol;
11734 /* Calculate the length of the memory address in the instruction
11735 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11738 memory_address_length (rtx addr)
11740 struct ix86_address parts;
11741 rtx base, index, disp;
11744 if (GET_CODE (addr) == PRE_DEC
11745 || GET_CODE (addr) == POST_INC
11746 || GET_CODE (addr) == PRE_MODIFY
11747 || GET_CODE (addr) == POST_MODIFY)
11750 if (! ix86_decompose_address (addr, &parts))
11754 index = parts.index;
11759 - esp as the base always wants an index,
11760 - ebp as the base always wants a displacement. */
11762 /* Register Indirect. */
11763 if (base && !index && !disp)
11765 /* esp (for its index) and ebp (for its displacement) need
11766 the two-byte modrm form. */
11767 if (addr == stack_pointer_rtx
11768 || addr == arg_pointer_rtx
11769 || addr == frame_pointer_rtx
11770 || addr == hard_frame_pointer_rtx)
11774 /* Direct Addressing. */
11775 else if (disp && !base && !index)
11780 /* Find the length of the displacement constant. */
11783 if (GET_CODE (disp) == CONST_INT
11784 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11790 /* ebp always wants a displacement. */
11791 else if (base == hard_frame_pointer_rtx)
11794 /* An index requires the two-byte modrm form... */
11796 /* ...like esp, which always wants an index. */
11797 || base == stack_pointer_rtx
11798 || base == arg_pointer_rtx
11799 || base == frame_pointer_rtx)
11806 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11807 is set, expect that insn have 8bit immediate alternative. */
11809 ix86_attr_length_immediate_default (rtx insn, int shortform)
11813 extract_insn_cached (insn);
11814 for (i = recog_data.n_operands - 1; i >= 0; --i)
11815 if (CONSTANT_P (recog_data.operand[i]))
11820 && GET_CODE (recog_data.operand[i]) == CONST_INT
11821 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11825 switch (get_attr_mode (insn))
11836 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11841 fatal_insn ("unknown insn mode", insn);
11847 /* Compute default value for "length_address" attribute. */
11849 ix86_attr_length_address_default (rtx insn)
11853 if (get_attr_type (insn) == TYPE_LEA)
11855 rtx set = PATTERN (insn);
11856 if (GET_CODE (set) == SET)
11858 else if (GET_CODE (set) == PARALLEL
11859 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11860 set = XVECEXP (set, 0, 0);
11863 #ifdef ENABLE_CHECKING
11869 return memory_address_length (SET_SRC (set));
11872 extract_insn_cached (insn);
11873 for (i = recog_data.n_operands - 1; i >= 0; --i)
11874 if (GET_CODE (recog_data.operand[i]) == MEM)
11876 return memory_address_length (XEXP (recog_data.operand[i], 0));
11882 /* Return the maximum number of instructions a cpu can issue. */
11885 ix86_issue_rate (void)
11889 case PROCESSOR_PENTIUM:
11893 case PROCESSOR_PENTIUMPRO:
11894 case PROCESSOR_PENTIUM4:
11895 case PROCESSOR_ATHLON:
11904 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11905 by DEP_INSN and nothing set by DEP_INSN. */
11908 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11912 /* Simplify the test for uninteresting insns. */
11913 if (insn_type != TYPE_SETCC
11914 && insn_type != TYPE_ICMOV
11915 && insn_type != TYPE_FCMOV
11916 && insn_type != TYPE_IBR)
11919 if ((set = single_set (dep_insn)) != 0)
11921 set = SET_DEST (set);
11924 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11925 && XVECLEN (PATTERN (dep_insn), 0) == 2
11926 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11927 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11929 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11930 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11935 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11938 /* This test is true if the dependent insn reads the flags but
11939 not any other potentially set register. */
11940 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11943 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11949 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11950 address with operands set by DEP_INSN. */
11953 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11957 if (insn_type == TYPE_LEA
11960 addr = PATTERN (insn);
11961 if (GET_CODE (addr) == SET)
11963 else if (GET_CODE (addr) == PARALLEL
11964 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11965 addr = XVECEXP (addr, 0, 0);
11968 addr = SET_SRC (addr);
11973 extract_insn_cached (insn);
11974 for (i = recog_data.n_operands - 1; i >= 0; --i)
11975 if (GET_CODE (recog_data.operand[i]) == MEM)
11977 addr = XEXP (recog_data.operand[i], 0);
11984 return modified_in_p (addr, dep_insn);
11988 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11990 enum attr_type insn_type, dep_insn_type;
11991 enum attr_memory memory, dep_memory;
11993 int dep_insn_code_number;
11995 /* Anti and output dependencies have zero cost on all CPUs. */
11996 if (REG_NOTE_KIND (link) != 0)
11999 dep_insn_code_number = recog_memoized (dep_insn);
12001 /* If we can't recognize the insns, we can't really do anything. */
12002 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12005 insn_type = get_attr_type (insn);
12006 dep_insn_type = get_attr_type (dep_insn);
12010 case PROCESSOR_PENTIUM:
12011 /* Address Generation Interlock adds a cycle of latency. */
12012 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12015 /* ??? Compares pair with jump/setcc. */
12016 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12019 /* Floating point stores require value to be ready one cycle earlier. */
12020 if (insn_type == TYPE_FMOV
12021 && get_attr_memory (insn) == MEMORY_STORE
12022 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12026 case PROCESSOR_PENTIUMPRO:
12027 memory = get_attr_memory (insn);
12028 dep_memory = get_attr_memory (dep_insn);
12030 /* Since we can't represent delayed latencies of load+operation,
12031 increase the cost here for non-imov insns. */
12032 if (dep_insn_type != TYPE_IMOV
12033 && dep_insn_type != TYPE_FMOV
12034 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12037 /* INT->FP conversion is expensive. */
12038 if (get_attr_fp_int_src (dep_insn))
12041 /* There is one cycle extra latency between an FP op and a store. */
12042 if (insn_type == TYPE_FMOV
12043 && (set = single_set (dep_insn)) != NULL_RTX
12044 && (set2 = single_set (insn)) != NULL_RTX
12045 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12046 && GET_CODE (SET_DEST (set2)) == MEM)
12049 /* Show ability of reorder buffer to hide latency of load by executing
12050 in parallel with previous instruction in case
12051 previous instruction is not needed to compute the address. */
12052 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12053 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12055 /* Claim moves to take one cycle, as core can issue one load
12056 at time and the next load can start cycle later. */
12057 if (dep_insn_type == TYPE_IMOV
12058 || dep_insn_type == TYPE_FMOV)
12066 memory = get_attr_memory (insn);
12067 dep_memory = get_attr_memory (dep_insn);
12068 /* The esp dependency is resolved before the instruction is really
12070 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12071 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12074 /* Since we can't represent delayed latencies of load+operation,
12075 increase the cost here for non-imov insns. */
12076 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12077 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12079 /* INT->FP conversion is expensive. */
12080 if (get_attr_fp_int_src (dep_insn))
12083 /* Show ability of reorder buffer to hide latency of load by executing
12084 in parallel with previous instruction in case
12085 previous instruction is not needed to compute the address. */
12086 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12087 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12089 /* Claim moves to take one cycle, as core can issue one load
12090 at time and the next load can start cycle later. */
12091 if (dep_insn_type == TYPE_IMOV
12092 || dep_insn_type == TYPE_FMOV)
12101 case PROCESSOR_ATHLON:
12103 memory = get_attr_memory (insn);
12104 dep_memory = get_attr_memory (dep_insn);
12106 /* Show ability of reorder buffer to hide latency of load by executing
12107 in parallel with previous instruction in case
12108 previous instruction is not needed to compute the address. */
12109 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12110 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12112 enum attr_unit unit = get_attr_unit (insn);
12115 /* Because of the difference between the length of integer and
12116 floating unit pipeline preparation stages, the memory operands
12117 for floating point are cheaper.
12119 ??? For Athlon it the difference is most probably 2. */
12120 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12123 loadcost = TARGET_ATHLON ? 2 : 0;
12125 if (cost >= loadcost)
12140 struct ppro_sched_data
12143 int issued_this_cycle;
12147 static enum attr_ppro_uops
12148 ix86_safe_ppro_uops (rtx insn)
12150 if (recog_memoized (insn) >= 0)
12151 return get_attr_ppro_uops (insn);
12153 return PPRO_UOPS_MANY;
12157 ix86_dump_ppro_packet (FILE *dump)
12159 if (ix86_sched_data.ppro.decode[0])
12161 fprintf (dump, "PPRO packet: %d",
12162 INSN_UID (ix86_sched_data.ppro.decode[0]));
12163 if (ix86_sched_data.ppro.decode[1])
12164 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12165 if (ix86_sched_data.ppro.decode[2])
12166 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12167 fputc ('\n', dump);
12171 /* We're beginning a new block. Initialize data structures as necessary. */
12174 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12175 int sched_verbose ATTRIBUTE_UNUSED,
12176 int veclen ATTRIBUTE_UNUSED)
12178 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12181 /* Shift INSN to SLOT, and shift everything else down. */
12184 ix86_reorder_insn (rtx *insnp, rtx *slot)
12190 insnp[0] = insnp[1];
12191 while (++insnp != slot);
12197 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12200 enum attr_ppro_uops cur_uops;
12201 int issued_this_cycle;
12205 /* At this point .ppro.decode contains the state of the three
12206 decoders from last "cycle". That is, those insns that were
12207 actually independent. But here we're scheduling for the
12208 decoder, and we may find things that are decodable in the
12211 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12212 issued_this_cycle = 0;
12215 cur_uops = ix86_safe_ppro_uops (*insnp);
12217 /* If the decoders are empty, and we've a complex insn at the
12218 head of the priority queue, let it issue without complaint. */
12219 if (decode[0] == NULL)
12221 if (cur_uops == PPRO_UOPS_MANY)
12223 decode[0] = *insnp;
12227 /* Otherwise, search for a 2-4 uop unsn to issue. */
12228 while (cur_uops != PPRO_UOPS_FEW)
12230 if (insnp == ready)
12232 cur_uops = ix86_safe_ppro_uops (*--insnp);
12235 /* If so, move it to the head of the line. */
12236 if (cur_uops == PPRO_UOPS_FEW)
12237 ix86_reorder_insn (insnp, e_ready);
12239 /* Issue the head of the queue. */
12240 issued_this_cycle = 1;
12241 decode[0] = *e_ready--;
12244 /* Look for simple insns to fill in the other two slots. */
12245 for (i = 1; i < 3; ++i)
12246 if (decode[i] == NULL)
12248 if (ready > e_ready)
12252 cur_uops = ix86_safe_ppro_uops (*insnp);
12253 while (cur_uops != PPRO_UOPS_ONE)
12255 if (insnp == ready)
12257 cur_uops = ix86_safe_ppro_uops (*--insnp);
12260 /* Found one. Move it to the head of the queue and issue it. */
12261 if (cur_uops == PPRO_UOPS_ONE)
12263 ix86_reorder_insn (insnp, e_ready);
12264 decode[i] = *e_ready--;
12265 issued_this_cycle++;
12269 /* ??? Didn't find one. Ideally, here we would do a lazy split
12270 of 2-uop insns, issue one and queue the other. */
12274 if (issued_this_cycle == 0)
12275 issued_this_cycle = 1;
12276 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12279 /* We are about to being issuing insns for this clock cycle.
12280 Override the default sort algorithm to better slot instructions. */
12282 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12283 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12284 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12286 int n_ready = *n_readyp;
12287 rtx *e_ready = ready + n_ready - 1;
12289 /* Make sure to go ahead and initialize key items in
12290 ix86_sched_data if we are not going to bother trying to
12291 reorder the ready queue. */
12294 ix86_sched_data.ppro.issued_this_cycle = 1;
12303 case PROCESSOR_PENTIUMPRO:
12304 ix86_sched_reorder_ppro (ready, e_ready);
12309 return ix86_issue_rate ();
12312 /* We are about to issue INSN. Return the number of insns left on the
12313 ready queue that can be issued this cycle. */
12316 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12317 int can_issue_more)
12323 return can_issue_more - 1;
12325 case PROCESSOR_PENTIUMPRO:
12327 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12329 if (uops == PPRO_UOPS_MANY)
12332 ix86_dump_ppro_packet (dump);
12333 ix86_sched_data.ppro.decode[0] = insn;
12334 ix86_sched_data.ppro.decode[1] = NULL;
12335 ix86_sched_data.ppro.decode[2] = NULL;
12337 ix86_dump_ppro_packet (dump);
12338 ix86_sched_data.ppro.decode[0] = NULL;
12340 else if (uops == PPRO_UOPS_FEW)
12343 ix86_dump_ppro_packet (dump);
12344 ix86_sched_data.ppro.decode[0] = insn;
12345 ix86_sched_data.ppro.decode[1] = NULL;
12346 ix86_sched_data.ppro.decode[2] = NULL;
12350 for (i = 0; i < 3; ++i)
12351 if (ix86_sched_data.ppro.decode[i] == NULL)
12353 ix86_sched_data.ppro.decode[i] = insn;
12361 ix86_dump_ppro_packet (dump);
12362 ix86_sched_data.ppro.decode[0] = NULL;
12363 ix86_sched_data.ppro.decode[1] = NULL;
12364 ix86_sched_data.ppro.decode[2] = NULL;
12368 return --ix86_sched_data.ppro.issued_this_cycle;
12373 ia32_use_dfa_pipeline_interface (void)
12375 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12380 /* How many alternative schedules to try. This should be as wide as the
12381 scheduling freedom in the DFA, but no wider. Making this value too
12382 large results extra work for the scheduler. */
12385 ia32_multipass_dfa_lookahead (void)
12387 if (ix86_tune == PROCESSOR_PENTIUM)
12394 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12395 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12399 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12404 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12406 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12410 /* Subroutine of above to actually do the updating by recursively walking
12414 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12417 enum rtx_code code = GET_CODE (x);
12418 const char *format_ptr = GET_RTX_FORMAT (code);
12421 if (code == MEM && XEXP (x, 0) == dstreg)
12422 MEM_COPY_ATTRIBUTES (x, dstref);
12423 else if (code == MEM && XEXP (x, 0) == srcreg)
12424 MEM_COPY_ATTRIBUTES (x, srcref);
12426 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12428 if (*format_ptr == 'e')
12429 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12431 else if (*format_ptr == 'E')
12432 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12433 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12438 /* Compute the alignment given to a constant that is being placed in memory.
12439 EXP is the constant and ALIGN is the alignment that the object would
12441 The value of this function is used instead of that alignment to align
12445 ix86_constant_alignment (tree exp, int align)
12447 if (TREE_CODE (exp) == REAL_CST)
12449 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12451 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12454 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12461 /* Compute the alignment for a static variable.
12462 TYPE is the data type, and ALIGN is the alignment that
12463 the object would ordinarily have. The value of this function is used
12464 instead of that alignment to align the object. */
12467 ix86_data_alignment (tree type, int align)
12469 if (AGGREGATE_TYPE_P (type)
12470 && TYPE_SIZE (type)
12471 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12472 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12473 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12476 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12477 to 16byte boundary. */
12480 if (AGGREGATE_TYPE_P (type)
12481 && TYPE_SIZE (type)
12482 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12483 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12484 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12488 if (TREE_CODE (type) == ARRAY_TYPE)
12490 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12492 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12495 else if (TREE_CODE (type) == COMPLEX_TYPE)
12498 if (TYPE_MODE (type) == DCmode && align < 64)
12500 if (TYPE_MODE (type) == XCmode && align < 128)
12503 else if ((TREE_CODE (type) == RECORD_TYPE
12504 || TREE_CODE (type) == UNION_TYPE
12505 || TREE_CODE (type) == QUAL_UNION_TYPE)
12506 && TYPE_FIELDS (type))
12508 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12510 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12513 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12514 || TREE_CODE (type) == INTEGER_TYPE)
12516 if (TYPE_MODE (type) == DFmode && align < 64)
12518 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12525 /* Compute the alignment for a local variable.
12526 TYPE is the data type, and ALIGN is the alignment that
12527 the object would ordinarily have. The value of this macro is used
12528 instead of that alignment to align the object. */
12531 ix86_local_alignment (tree type, int align)
12533 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12534 to 16byte boundary. */
12537 if (AGGREGATE_TYPE_P (type)
12538 && TYPE_SIZE (type)
12539 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12540 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12541 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12544 if (TREE_CODE (type) == ARRAY_TYPE)
12546 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12548 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12551 else if (TREE_CODE (type) == COMPLEX_TYPE)
12553 if (TYPE_MODE (type) == DCmode && align < 64)
12555 if (TYPE_MODE (type) == XCmode && align < 128)
12558 else if ((TREE_CODE (type) == RECORD_TYPE
12559 || TREE_CODE (type) == UNION_TYPE
12560 || TREE_CODE (type) == QUAL_UNION_TYPE)
12561 && TYPE_FIELDS (type))
12563 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12565 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12568 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12569 || TREE_CODE (type) == INTEGER_TYPE)
12572 if (TYPE_MODE (type) == DFmode && align < 64)
12574 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12580 /* Emit RTL insns to initialize the variable parts of a trampoline.
12581 FNADDR is an RTX for the address of the function's pure code.
12582 CXT is an RTX for the static chain value for the function. */
12584 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12588 /* Compute offset from the end of the jmp to the target function. */
12589 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12590 plus_constant (tramp, 10),
12591 NULL_RTX, 1, OPTAB_DIRECT);
12592 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12593 gen_int_mode (0xb9, QImode));
12594 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12595 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12596 gen_int_mode (0xe9, QImode));
12597 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12602 /* Try to load address using shorter movl instead of movabs.
12603 We may want to support movq for kernel mode, but kernel does not use
12604 trampolines at the moment. */
12605 if (x86_64_zero_extended_value (fnaddr))
12607 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12608 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12609 gen_int_mode (0xbb41, HImode));
12610 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12611 gen_lowpart (SImode, fnaddr));
12616 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12617 gen_int_mode (0xbb49, HImode));
12618 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12622 /* Load static chain using movabs to r10. */
12623 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12624 gen_int_mode (0xba49, HImode));
12625 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12628 /* Jump to the r11 */
12629 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12630 gen_int_mode (0xff49, HImode));
12631 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12632 gen_int_mode (0xe3, QImode));
12634 if (offset > TRAMPOLINE_SIZE)
12638 #ifdef TRANSFER_FROM_TRAMPOLINE
12639 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12640 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12644 #define def_builtin(MASK, NAME, TYPE, CODE) \
12646 if ((MASK) & target_flags \
12647 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12648 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12649 NULL, NULL_TREE); \
12652 struct builtin_description
12654 const unsigned int mask;
12655 const enum insn_code icode;
12656 const char *const name;
12657 const enum ix86_builtins code;
12658 const enum rtx_code comparison;
12659 const unsigned int flag;
12662 static const struct builtin_description bdesc_comi[] =
12664 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12665 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12666 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12667 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12668 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12669 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12670 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12671 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12672 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12673 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12674 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12675 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12676 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12677 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12678 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12679 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12680 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12681 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12682 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12683 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12684 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12685 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12686 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12687 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12690 static const struct builtin_description bdesc_2arg[] =
12693 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12694 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12695 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12696 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12697 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12698 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12699 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12700 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12703 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12704 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12705 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12706 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12707 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12708 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12709 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12710 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12711 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12712 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12713 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12714 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12715 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12716 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12717 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12718 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12719 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12720 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12721 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12723 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12724 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12725 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12726 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12728 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12729 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12730 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12731 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12733 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12734 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12735 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12736 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12737 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12740 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12741 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12742 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12743 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12744 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12745 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12746 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12747 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12749 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12750 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12751 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12752 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12753 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12754 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12755 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12756 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12758 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12759 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12760 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12762 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12763 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12764 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12765 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12767 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12768 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12770 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12771 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12772 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12773 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12774 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12775 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12777 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12778 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12779 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12780 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12782 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12783 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12784 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12785 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12786 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12787 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12790 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12792 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12794 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12795 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12796 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12798 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12799 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12800 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12801 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12802 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12803 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12805 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12806 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12807 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12808 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12809 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12812 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12813 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12814 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12815 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12817 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12831 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12832 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12833 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12834 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12835 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12836 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12837 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12838 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12839 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12840 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12841 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12842 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12843 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12844 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12845 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12846 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12847 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12848 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12849 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12851 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12852 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12853 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12854 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12856 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12857 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12858 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12859 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12875 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12876 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12877 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12878 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12879 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12880 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12881 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12882 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12922 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12929 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12930 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12947 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12952 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12953 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12954 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12955 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12956 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12957 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12960 static const struct builtin_description bdesc_1arg[] =
12962 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12963 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12965 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12966 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12967 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12969 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12970 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12971 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12972 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12973 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12974 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12996 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12997 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12999 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13006 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13007 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13008 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13012 ix86_init_builtins (void)
13015 ix86_init_mmx_sse_builtins ();
13018 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13019 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13022 ix86_init_mmx_sse_builtins (void)
13024 const struct builtin_description * d;
13027 tree pchar_type_node = build_pointer_type (char_type_node);
13028 tree pcchar_type_node = build_pointer_type (
13029 build_type_variant (char_type_node, 1, 0));
13030 tree pfloat_type_node = build_pointer_type (float_type_node);
13031 tree pcfloat_type_node = build_pointer_type (
13032 build_type_variant (float_type_node, 1, 0));
13033 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13034 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13035 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13038 tree int_ftype_v4sf_v4sf
13039 = build_function_type_list (integer_type_node,
13040 V4SF_type_node, V4SF_type_node, NULL_TREE);
13041 tree v4si_ftype_v4sf_v4sf
13042 = build_function_type_list (V4SI_type_node,
13043 V4SF_type_node, V4SF_type_node, NULL_TREE);
13044 /* MMX/SSE/integer conversions. */
13045 tree int_ftype_v4sf
13046 = build_function_type_list (integer_type_node,
13047 V4SF_type_node, NULL_TREE);
13048 tree int64_ftype_v4sf
13049 = build_function_type_list (long_long_integer_type_node,
13050 V4SF_type_node, NULL_TREE);
13051 tree int_ftype_v8qi
13052 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13053 tree v4sf_ftype_v4sf_int
13054 = build_function_type_list (V4SF_type_node,
13055 V4SF_type_node, integer_type_node, NULL_TREE);
13056 tree v4sf_ftype_v4sf_int64
13057 = build_function_type_list (V4SF_type_node,
13058 V4SF_type_node, long_long_integer_type_node,
13060 tree v4sf_ftype_v4sf_v2si
13061 = build_function_type_list (V4SF_type_node,
13062 V4SF_type_node, V2SI_type_node, NULL_TREE);
13063 tree int_ftype_v4hi_int
13064 = build_function_type_list (integer_type_node,
13065 V4HI_type_node, integer_type_node, NULL_TREE);
13066 tree v4hi_ftype_v4hi_int_int
13067 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13068 integer_type_node, integer_type_node,
13070 /* Miscellaneous. */
13071 tree v8qi_ftype_v4hi_v4hi
13072 = build_function_type_list (V8QI_type_node,
13073 V4HI_type_node, V4HI_type_node, NULL_TREE);
13074 tree v4hi_ftype_v2si_v2si
13075 = build_function_type_list (V4HI_type_node,
13076 V2SI_type_node, V2SI_type_node, NULL_TREE);
13077 tree v4sf_ftype_v4sf_v4sf_int
13078 = build_function_type_list (V4SF_type_node,
13079 V4SF_type_node, V4SF_type_node,
13080 integer_type_node, NULL_TREE);
13081 tree v2si_ftype_v4hi_v4hi
13082 = build_function_type_list (V2SI_type_node,
13083 V4HI_type_node, V4HI_type_node, NULL_TREE);
13084 tree v4hi_ftype_v4hi_int
13085 = build_function_type_list (V4HI_type_node,
13086 V4HI_type_node, integer_type_node, NULL_TREE);
13087 tree v4hi_ftype_v4hi_di
13088 = build_function_type_list (V4HI_type_node,
13089 V4HI_type_node, long_long_unsigned_type_node,
13091 tree v2si_ftype_v2si_di
13092 = build_function_type_list (V2SI_type_node,
13093 V2SI_type_node, long_long_unsigned_type_node,
13095 tree void_ftype_void
13096 = build_function_type (void_type_node, void_list_node);
13097 tree void_ftype_unsigned
13098 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13099 tree void_ftype_unsigned_unsigned
13100 = build_function_type_list (void_type_node, unsigned_type_node,
13101 unsigned_type_node, NULL_TREE);
13102 tree void_ftype_pcvoid_unsigned_unsigned
13103 = build_function_type_list (void_type_node, const_ptr_type_node,
13104 unsigned_type_node, unsigned_type_node,
13106 tree unsigned_ftype_void
13107 = build_function_type (unsigned_type_node, void_list_node);
13109 = build_function_type (long_long_unsigned_type_node, void_list_node);
13110 tree v4sf_ftype_void
13111 = build_function_type (V4SF_type_node, void_list_node);
13112 tree v2si_ftype_v4sf
13113 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13114 /* Loads/stores. */
13115 tree void_ftype_v8qi_v8qi_pchar
13116 = build_function_type_list (void_type_node,
13117 V8QI_type_node, V8QI_type_node,
13118 pchar_type_node, NULL_TREE);
13119 tree v4sf_ftype_pcfloat
13120 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13121 /* @@@ the type is bogus */
13122 tree v4sf_ftype_v4sf_pv2si
13123 = build_function_type_list (V4SF_type_node,
13124 V4SF_type_node, pv2si_type_node, NULL_TREE);
13125 tree void_ftype_pv2si_v4sf
13126 = build_function_type_list (void_type_node,
13127 pv2si_type_node, V4SF_type_node, NULL_TREE);
13128 tree void_ftype_pfloat_v4sf
13129 = build_function_type_list (void_type_node,
13130 pfloat_type_node, V4SF_type_node, NULL_TREE);
13131 tree void_ftype_pdi_di
13132 = build_function_type_list (void_type_node,
13133 pdi_type_node, long_long_unsigned_type_node,
13135 tree void_ftype_pv2di_v2di
13136 = build_function_type_list (void_type_node,
13137 pv2di_type_node, V2DI_type_node, NULL_TREE);
13138 /* Normal vector unops. */
13139 tree v4sf_ftype_v4sf
13140 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13142 /* Normal vector binops. */
13143 tree v4sf_ftype_v4sf_v4sf
13144 = build_function_type_list (V4SF_type_node,
13145 V4SF_type_node, V4SF_type_node, NULL_TREE);
13146 tree v8qi_ftype_v8qi_v8qi
13147 = build_function_type_list (V8QI_type_node,
13148 V8QI_type_node, V8QI_type_node, NULL_TREE);
13149 tree v4hi_ftype_v4hi_v4hi
13150 = build_function_type_list (V4HI_type_node,
13151 V4HI_type_node, V4HI_type_node, NULL_TREE);
13152 tree v2si_ftype_v2si_v2si
13153 = build_function_type_list (V2SI_type_node,
13154 V2SI_type_node, V2SI_type_node, NULL_TREE);
13155 tree di_ftype_di_di
13156 = build_function_type_list (long_long_unsigned_type_node,
13157 long_long_unsigned_type_node,
13158 long_long_unsigned_type_node, NULL_TREE);
13160 tree v2si_ftype_v2sf
13161 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13162 tree v2sf_ftype_v2si
13163 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13164 tree v2si_ftype_v2si
13165 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13166 tree v2sf_ftype_v2sf
13167 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13168 tree v2sf_ftype_v2sf_v2sf
13169 = build_function_type_list (V2SF_type_node,
13170 V2SF_type_node, V2SF_type_node, NULL_TREE);
13171 tree v2si_ftype_v2sf_v2sf
13172 = build_function_type_list (V2SI_type_node,
13173 V2SF_type_node, V2SF_type_node, NULL_TREE);
13174 tree pint_type_node = build_pointer_type (integer_type_node);
13175 tree pcint_type_node = build_pointer_type (
13176 build_type_variant (integer_type_node, 1, 0));
13177 tree pdouble_type_node = build_pointer_type (double_type_node);
13178 tree pcdouble_type_node = build_pointer_type (
13179 build_type_variant (double_type_node, 1, 0));
13180 tree int_ftype_v2df_v2df
13181 = build_function_type_list (integer_type_node,
13182 V2DF_type_node, V2DF_type_node, NULL_TREE);
13185 = build_function_type (intTI_type_node, void_list_node);
13186 tree v2di_ftype_void
13187 = build_function_type (V2DI_type_node, void_list_node);
13188 tree ti_ftype_ti_ti
13189 = build_function_type_list (intTI_type_node,
13190 intTI_type_node, intTI_type_node, NULL_TREE);
13191 tree void_ftype_pcvoid
13192 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13194 = build_function_type_list (V2DI_type_node,
13195 long_long_unsigned_type_node, NULL_TREE);
13197 = build_function_type_list (long_long_unsigned_type_node,
13198 V2DI_type_node, NULL_TREE);
13199 tree v4sf_ftype_v4si
13200 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13201 tree v4si_ftype_v4sf
13202 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13203 tree v2df_ftype_v4si
13204 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13205 tree v4si_ftype_v2df
13206 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13207 tree v2si_ftype_v2df
13208 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13209 tree v4sf_ftype_v2df
13210 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13211 tree v2df_ftype_v2si
13212 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13213 tree v2df_ftype_v4sf
13214 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13215 tree int_ftype_v2df
13216 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13217 tree int64_ftype_v2df
13218 = build_function_type_list (long_long_integer_type_node,
13219 V2DF_type_node, NULL_TREE);
13220 tree v2df_ftype_v2df_int
13221 = build_function_type_list (V2DF_type_node,
13222 V2DF_type_node, integer_type_node, NULL_TREE);
13223 tree v2df_ftype_v2df_int64
13224 = build_function_type_list (V2DF_type_node,
13225 V2DF_type_node, long_long_integer_type_node,
13227 tree v4sf_ftype_v4sf_v2df
13228 = build_function_type_list (V4SF_type_node,
13229 V4SF_type_node, V2DF_type_node, NULL_TREE);
13230 tree v2df_ftype_v2df_v4sf
13231 = build_function_type_list (V2DF_type_node,
13232 V2DF_type_node, V4SF_type_node, NULL_TREE);
13233 tree v2df_ftype_v2df_v2df_int
13234 = build_function_type_list (V2DF_type_node,
13235 V2DF_type_node, V2DF_type_node,
13238 tree v2df_ftype_v2df_pv2si
13239 = build_function_type_list (V2DF_type_node,
13240 V2DF_type_node, pv2si_type_node, NULL_TREE);
13241 tree void_ftype_pv2si_v2df
13242 = build_function_type_list (void_type_node,
13243 pv2si_type_node, V2DF_type_node, NULL_TREE);
13244 tree void_ftype_pdouble_v2df
13245 = build_function_type_list (void_type_node,
13246 pdouble_type_node, V2DF_type_node, NULL_TREE);
13247 tree void_ftype_pint_int
13248 = build_function_type_list (void_type_node,
13249 pint_type_node, integer_type_node, NULL_TREE);
13250 tree void_ftype_v16qi_v16qi_pchar
13251 = build_function_type_list (void_type_node,
13252 V16QI_type_node, V16QI_type_node,
13253 pchar_type_node, NULL_TREE);
13254 tree v2df_ftype_pcdouble
13255 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13256 tree v2df_ftype_v2df_v2df
13257 = build_function_type_list (V2DF_type_node,
13258 V2DF_type_node, V2DF_type_node, NULL_TREE);
13259 tree v16qi_ftype_v16qi_v16qi
13260 = build_function_type_list (V16QI_type_node,
13261 V16QI_type_node, V16QI_type_node, NULL_TREE);
13262 tree v8hi_ftype_v8hi_v8hi
13263 = build_function_type_list (V8HI_type_node,
13264 V8HI_type_node, V8HI_type_node, NULL_TREE);
13265 tree v4si_ftype_v4si_v4si
13266 = build_function_type_list (V4SI_type_node,
13267 V4SI_type_node, V4SI_type_node, NULL_TREE);
13268 tree v2di_ftype_v2di_v2di
13269 = build_function_type_list (V2DI_type_node,
13270 V2DI_type_node, V2DI_type_node, NULL_TREE);
13271 tree v2di_ftype_v2df_v2df
13272 = build_function_type_list (V2DI_type_node,
13273 V2DF_type_node, V2DF_type_node, NULL_TREE);
13274 tree v2df_ftype_v2df
13275 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13276 tree v2df_ftype_double
13277 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13278 tree v2df_ftype_double_double
13279 = build_function_type_list (V2DF_type_node,
13280 double_type_node, double_type_node, NULL_TREE);
13281 tree int_ftype_v8hi_int
13282 = build_function_type_list (integer_type_node,
13283 V8HI_type_node, integer_type_node, NULL_TREE);
13284 tree v8hi_ftype_v8hi_int_int
13285 = build_function_type_list (V8HI_type_node,
13286 V8HI_type_node, integer_type_node,
13287 integer_type_node, NULL_TREE);
13288 tree v2di_ftype_v2di_int
13289 = build_function_type_list (V2DI_type_node,
13290 V2DI_type_node, integer_type_node, NULL_TREE);
13291 tree v4si_ftype_v4si_int
13292 = build_function_type_list (V4SI_type_node,
13293 V4SI_type_node, integer_type_node, NULL_TREE);
13294 tree v8hi_ftype_v8hi_int
13295 = build_function_type_list (V8HI_type_node,
13296 V8HI_type_node, integer_type_node, NULL_TREE);
13297 tree v8hi_ftype_v8hi_v2di
13298 = build_function_type_list (V8HI_type_node,
13299 V8HI_type_node, V2DI_type_node, NULL_TREE);
13300 tree v4si_ftype_v4si_v2di
13301 = build_function_type_list (V4SI_type_node,
13302 V4SI_type_node, V2DI_type_node, NULL_TREE);
13303 tree v4si_ftype_v8hi_v8hi
13304 = build_function_type_list (V4SI_type_node,
13305 V8HI_type_node, V8HI_type_node, NULL_TREE);
13306 tree di_ftype_v8qi_v8qi
13307 = build_function_type_list (long_long_unsigned_type_node,
13308 V8QI_type_node, V8QI_type_node, NULL_TREE);
13309 tree v2di_ftype_v16qi_v16qi
13310 = build_function_type_list (V2DI_type_node,
13311 V16QI_type_node, V16QI_type_node, NULL_TREE);
13312 tree int_ftype_v16qi
13313 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13314 tree v16qi_ftype_pcchar
13315 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13316 tree void_ftype_pchar_v16qi
13317 = build_function_type_list (void_type_node,
13318 pchar_type_node, V16QI_type_node, NULL_TREE);
13319 tree v4si_ftype_pcint
13320 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13321 tree void_ftype_pcint_v4si
13322 = build_function_type_list (void_type_node,
13323 pcint_type_node, V4SI_type_node, NULL_TREE);
13324 tree v2di_ftype_v2di
13325 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13328 tree float128_type;
13330 /* The __float80 type. */
13331 if (TYPE_MODE (long_double_type_node) == XFmode)
13332 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13336 /* The __float80 type. */
13337 float80_type = make_node (REAL_TYPE);
13338 TYPE_PRECISION (float80_type) = 96;
13339 layout_type (float80_type);
13340 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13343 float128_type = make_node (REAL_TYPE);
13344 TYPE_PRECISION (float128_type) = 128;
13345 layout_type (float128_type);
13346 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13348 /* Add all builtins that are more or less simple operations on two
13350 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13352 /* Use one of the operands; the target can have a different mode for
13353 mask-generating compares. */
13354 enum machine_mode mode;
13359 mode = insn_data[d->icode].operand[1].mode;
13364 type = v16qi_ftype_v16qi_v16qi;
13367 type = v8hi_ftype_v8hi_v8hi;
13370 type = v4si_ftype_v4si_v4si;
13373 type = v2di_ftype_v2di_v2di;
13376 type = v2df_ftype_v2df_v2df;
13379 type = ti_ftype_ti_ti;
13382 type = v4sf_ftype_v4sf_v4sf;
13385 type = v8qi_ftype_v8qi_v8qi;
13388 type = v4hi_ftype_v4hi_v4hi;
13391 type = v2si_ftype_v2si_v2si;
13394 type = di_ftype_di_di;
13401 /* Override for comparisons. */
13402 if (d->icode == CODE_FOR_maskcmpv4sf3
13403 || d->icode == CODE_FOR_maskncmpv4sf3
13404 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13405 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13406 type = v4si_ftype_v4sf_v4sf;
13408 if (d->icode == CODE_FOR_maskcmpv2df3
13409 || d->icode == CODE_FOR_maskncmpv2df3
13410 || d->icode == CODE_FOR_vmmaskcmpv2df3
13411 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13412 type = v2di_ftype_v2df_v2df;
13414 def_builtin (d->mask, d->name, type, d->code);
13417 /* Add the remaining MMX insns with somewhat more complicated types. */
13418 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13419 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13420 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13421 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13422 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13424 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13425 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13426 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13428 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13429 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13431 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13432 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13434 /* comi/ucomi insns. */
13435 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13436 if (d->mask == MASK_SSE2)
13437 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13439 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13441 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13442 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13443 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13445 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13446 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13447 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13448 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13449 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13450 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13451 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13452 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13453 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13454 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13455 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13457 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13458 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13460 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13462 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13463 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13464 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13465 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13466 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13467 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13469 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13470 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13471 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13472 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13474 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13475 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13476 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13477 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13479 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13481 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13483 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13484 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13485 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13486 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13487 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13488 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13490 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13492 /* Original 3DNow! */
13493 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13494 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13495 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13496 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13497 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13498 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13499 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13506 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13507 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13508 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13509 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13510 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13511 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13512 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13514 /* 3DNow! extension as used in the Athlon CPU. */
13515 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13516 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13517 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13518 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13519 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13520 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13522 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13526 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13529 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13530 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13535 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13537 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13540 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13548 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13552 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13561 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13573 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13574 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13581 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13591 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13602 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13605 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13607 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13618 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13633 /* Prescott New Instructions. */
13634 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13635 void_ftype_pcvoid_unsigned_unsigned,
13636 IX86_BUILTIN_MONITOR);
13637 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13638 void_ftype_unsigned_unsigned,
13639 IX86_BUILTIN_MWAIT);
13640 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13642 IX86_BUILTIN_MOVSHDUP);
13643 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13645 IX86_BUILTIN_MOVSLDUP);
13646 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13647 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13648 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13649 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13650 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13651 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13654 /* Errors in the source file can cause expand_expr to return const0_rtx
13655 where we expect a vector. To avoid crashing, use one of the vector
13656 clear instructions. */
13658 safe_vector_operand (rtx x, enum machine_mode mode)
13660 if (x != const0_rtx)
13662 x = gen_reg_rtx (mode);
13664 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13665 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13666 : gen_rtx_SUBREG (DImode, x, 0)));
13668 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13669 : gen_rtx_SUBREG (V4SFmode, x, 0),
13670 CONST0_RTX (V4SFmode)));
13674 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13677 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13680 tree arg0 = TREE_VALUE (arglist);
13681 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13682 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13683 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13684 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13685 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13686 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13688 if (VECTOR_MODE_P (mode0))
13689 op0 = safe_vector_operand (op0, mode0);
13690 if (VECTOR_MODE_P (mode1))
13691 op1 = safe_vector_operand (op1, mode1);
13694 || GET_MODE (target) != tmode
13695 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13696 target = gen_reg_rtx (tmode);
13698 if (GET_MODE (op1) == SImode && mode1 == TImode)
13700 rtx x = gen_reg_rtx (V4SImode);
13701 emit_insn (gen_sse2_loadd (x, op1));
13702 op1 = gen_lowpart (TImode, x);
13705 /* In case the insn wants input operands in modes different from
13706 the result, abort. */
13707 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13708 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13711 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13712 op0 = copy_to_mode_reg (mode0, op0);
13713 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13714 op1 = copy_to_mode_reg (mode1, op1);
13716 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13717 yet one of the two must not be a memory. This is normally enforced
13718 by expanders, but we didn't bother to create one here. */
13719 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13720 op0 = copy_to_mode_reg (mode0, op0);
13722 pat = GEN_FCN (icode) (target, op0, op1);
13729 /* Subroutine of ix86_expand_builtin to take care of stores. */
13732 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13735 tree arg0 = TREE_VALUE (arglist);
13736 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13737 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13738 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13739 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13740 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13742 if (VECTOR_MODE_P (mode1))
13743 op1 = safe_vector_operand (op1, mode1);
13745 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13746 op1 = copy_to_mode_reg (mode1, op1);
13748 pat = GEN_FCN (icode) (op0, op1);
13754 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13757 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13758 rtx target, int do_load)
13761 tree arg0 = TREE_VALUE (arglist);
13762 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13763 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13764 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13767 || GET_MODE (target) != tmode
13768 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13769 target = gen_reg_rtx (tmode);
13771 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13774 if (VECTOR_MODE_P (mode0))
13775 op0 = safe_vector_operand (op0, mode0);
13777 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13778 op0 = copy_to_mode_reg (mode0, op0);
13781 pat = GEN_FCN (icode) (target, op0);
13788 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13789 sqrtss, rsqrtss, rcpss. */
13792 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13795 tree arg0 = TREE_VALUE (arglist);
13796 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13797 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13798 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13801 || GET_MODE (target) != tmode
13802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13803 target = gen_reg_rtx (tmode);
13805 if (VECTOR_MODE_P (mode0))
13806 op0 = safe_vector_operand (op0, mode0);
13808 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13809 op0 = copy_to_mode_reg (mode0, op0);
13812 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13813 op1 = copy_to_mode_reg (mode0, op1);
13815 pat = GEN_FCN (icode) (target, op0, op1);
13822 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13825 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13829 tree arg0 = TREE_VALUE (arglist);
13830 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13831 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13832 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13834 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13835 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13836 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13837 enum rtx_code comparison = d->comparison;
13839 if (VECTOR_MODE_P (mode0))
13840 op0 = safe_vector_operand (op0, mode0);
13841 if (VECTOR_MODE_P (mode1))
13842 op1 = safe_vector_operand (op1, mode1);
13844 /* Swap operands if we have a comparison that isn't available in
13848 rtx tmp = gen_reg_rtx (mode1);
13849 emit_move_insn (tmp, op1);
13855 || GET_MODE (target) != tmode
13856 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13857 target = gen_reg_rtx (tmode);
13859 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13860 op0 = copy_to_mode_reg (mode0, op0);
13861 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13862 op1 = copy_to_mode_reg (mode1, op1);
13864 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13865 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13872 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13875 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13879 tree arg0 = TREE_VALUE (arglist);
13880 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13881 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13882 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13884 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13885 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13886 enum rtx_code comparison = d->comparison;
13888 if (VECTOR_MODE_P (mode0))
13889 op0 = safe_vector_operand (op0, mode0);
13890 if (VECTOR_MODE_P (mode1))
13891 op1 = safe_vector_operand (op1, mode1);
13893 /* Swap operands if we have a comparison that isn't available in
13902 target = gen_reg_rtx (SImode);
13903 emit_move_insn (target, const0_rtx);
13904 target = gen_rtx_SUBREG (QImode, target, 0);
13906 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13907 op0 = copy_to_mode_reg (mode0, op0);
13908 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13909 op1 = copy_to_mode_reg (mode1, op1);
13911 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13912 pat = GEN_FCN (d->icode) (op0, op1);
13916 emit_insn (gen_rtx_SET (VOIDmode,
13917 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13918 gen_rtx_fmt_ee (comparison, QImode,
13922 return SUBREG_REG (target);
13925 /* Expand an expression EXP that calls a built-in function,
13926 with result going to TARGET if that's convenient
13927 (and in mode MODE if that's convenient).
13928 SUBTARGET may be used as the target for computing one of EXP's operands.
13929 IGNORE is nonzero if the value is to be ignored. */
13932 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13933 enum machine_mode mode ATTRIBUTE_UNUSED,
13934 int ignore ATTRIBUTE_UNUSED)
13936 const struct builtin_description *d;
13938 enum insn_code icode;
13939 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13940 tree arglist = TREE_OPERAND (exp, 1);
13941 tree arg0, arg1, arg2;
13942 rtx op0, op1, op2, pat;
13943 enum machine_mode tmode, mode0, mode1, mode2;
13944 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13948 case IX86_BUILTIN_EMMS:
13949 emit_insn (gen_emms ());
13952 case IX86_BUILTIN_SFENCE:
13953 emit_insn (gen_sfence ());
13956 case IX86_BUILTIN_PEXTRW:
13957 case IX86_BUILTIN_PEXTRW128:
13958 icode = (fcode == IX86_BUILTIN_PEXTRW
13959 ? CODE_FOR_mmx_pextrw
13960 : CODE_FOR_sse2_pextrw);
13961 arg0 = TREE_VALUE (arglist);
13962 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13963 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13964 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13965 tmode = insn_data[icode].operand[0].mode;
13966 mode0 = insn_data[icode].operand[1].mode;
13967 mode1 = insn_data[icode].operand[2].mode;
13969 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13970 op0 = copy_to_mode_reg (mode0, op0);
13971 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13973 error ("selector must be an integer constant in the range 0..%i",
13974 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13975 return gen_reg_rtx (tmode);
13978 || GET_MODE (target) != tmode
13979 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13980 target = gen_reg_rtx (tmode);
13981 pat = GEN_FCN (icode) (target, op0, op1);
13987 case IX86_BUILTIN_PINSRW:
13988 case IX86_BUILTIN_PINSRW128:
13989 icode = (fcode == IX86_BUILTIN_PINSRW
13990 ? CODE_FOR_mmx_pinsrw
13991 : CODE_FOR_sse2_pinsrw);
13992 arg0 = TREE_VALUE (arglist);
13993 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13994 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13995 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13996 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13997 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13998 tmode = insn_data[icode].operand[0].mode;
13999 mode0 = insn_data[icode].operand[1].mode;
14000 mode1 = insn_data[icode].operand[2].mode;
14001 mode2 = insn_data[icode].operand[3].mode;
14003 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14004 op0 = copy_to_mode_reg (mode0, op0);
14005 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14006 op1 = copy_to_mode_reg (mode1, op1);
14007 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14009 error ("selector must be an integer constant in the range 0..%i",
14010 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14014 || GET_MODE (target) != tmode
14015 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14016 target = gen_reg_rtx (tmode);
14017 pat = GEN_FCN (icode) (target, op0, op1, op2);
14023 case IX86_BUILTIN_MASKMOVQ:
14024 case IX86_BUILTIN_MASKMOVDQU:
14025 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14026 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14027 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14028 : CODE_FOR_sse2_maskmovdqu));
14029 /* Note the arg order is different from the operand order. */
14030 arg1 = TREE_VALUE (arglist);
14031 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14032 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14033 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14034 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14035 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14036 mode0 = insn_data[icode].operand[0].mode;
14037 mode1 = insn_data[icode].operand[1].mode;
14038 mode2 = insn_data[icode].operand[2].mode;
14040 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14041 op0 = copy_to_mode_reg (mode0, op0);
14042 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14043 op1 = copy_to_mode_reg (mode1, op1);
14044 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14045 op2 = copy_to_mode_reg (mode2, op2);
14046 pat = GEN_FCN (icode) (op0, op1, op2);
14052 case IX86_BUILTIN_SQRTSS:
14053 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14054 case IX86_BUILTIN_RSQRTSS:
14055 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14056 case IX86_BUILTIN_RCPSS:
14057 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14059 case IX86_BUILTIN_LOADAPS:
14060 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14062 case IX86_BUILTIN_LOADUPS:
14063 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14065 case IX86_BUILTIN_STOREAPS:
14066 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14068 case IX86_BUILTIN_STOREUPS:
14069 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14071 case IX86_BUILTIN_LOADSS:
14072 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14074 case IX86_BUILTIN_STORESS:
14075 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14077 case IX86_BUILTIN_LOADHPS:
14078 case IX86_BUILTIN_LOADLPS:
14079 case IX86_BUILTIN_LOADHPD:
14080 case IX86_BUILTIN_LOADLPD:
14081 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14082 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14083 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14084 : CODE_FOR_sse2_movlpd);
14085 arg0 = TREE_VALUE (arglist);
14086 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14087 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14088 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14089 tmode = insn_data[icode].operand[0].mode;
14090 mode0 = insn_data[icode].operand[1].mode;
14091 mode1 = insn_data[icode].operand[2].mode;
14093 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14094 op0 = copy_to_mode_reg (mode0, op0);
14095 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14097 || GET_MODE (target) != tmode
14098 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14099 target = gen_reg_rtx (tmode);
14100 pat = GEN_FCN (icode) (target, op0, op1);
14106 case IX86_BUILTIN_STOREHPS:
14107 case IX86_BUILTIN_STORELPS:
14108 case IX86_BUILTIN_STOREHPD:
14109 case IX86_BUILTIN_STORELPD:
14110 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14111 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14112 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14113 : CODE_FOR_sse2_movlpd);
14114 arg0 = TREE_VALUE (arglist);
14115 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14116 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14117 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14118 mode0 = insn_data[icode].operand[1].mode;
14119 mode1 = insn_data[icode].operand[2].mode;
14121 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14122 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14123 op1 = copy_to_mode_reg (mode1, op1);
14125 pat = GEN_FCN (icode) (op0, op0, op1);
14131 case IX86_BUILTIN_MOVNTPS:
14132 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14133 case IX86_BUILTIN_MOVNTQ:
14134 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14136 case IX86_BUILTIN_LDMXCSR:
14137 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14138 target = assign_386_stack_local (SImode, 0);
14139 emit_move_insn (target, op0);
14140 emit_insn (gen_ldmxcsr (target));
14143 case IX86_BUILTIN_STMXCSR:
14144 target = assign_386_stack_local (SImode, 0);
14145 emit_insn (gen_stmxcsr (target));
14146 return copy_to_mode_reg (SImode, target);
14148 case IX86_BUILTIN_SHUFPS:
14149 case IX86_BUILTIN_SHUFPD:
14150 icode = (fcode == IX86_BUILTIN_SHUFPS
14151 ? CODE_FOR_sse_shufps
14152 : CODE_FOR_sse2_shufpd);
14153 arg0 = TREE_VALUE (arglist);
14154 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14155 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14156 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14157 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14158 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14159 tmode = insn_data[icode].operand[0].mode;
14160 mode0 = insn_data[icode].operand[1].mode;
14161 mode1 = insn_data[icode].operand[2].mode;
14162 mode2 = insn_data[icode].operand[3].mode;
14164 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14165 op0 = copy_to_mode_reg (mode0, op0);
14166 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14167 op1 = copy_to_mode_reg (mode1, op1);
14168 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14170 /* @@@ better error message */
14171 error ("mask must be an immediate");
14172 return gen_reg_rtx (tmode);
14175 || GET_MODE (target) != tmode
14176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14177 target = gen_reg_rtx (tmode);
14178 pat = GEN_FCN (icode) (target, op0, op1, op2);
14184 case IX86_BUILTIN_PSHUFW:
14185 case IX86_BUILTIN_PSHUFD:
14186 case IX86_BUILTIN_PSHUFHW:
14187 case IX86_BUILTIN_PSHUFLW:
14188 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14189 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14190 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14191 : CODE_FOR_mmx_pshufw);
14192 arg0 = TREE_VALUE (arglist);
14193 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14194 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14195 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14196 tmode = insn_data[icode].operand[0].mode;
14197 mode1 = insn_data[icode].operand[1].mode;
14198 mode2 = insn_data[icode].operand[2].mode;
14200 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14201 op0 = copy_to_mode_reg (mode1, op0);
14202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14204 /* @@@ better error message */
14205 error ("mask must be an immediate");
14209 || GET_MODE (target) != tmode
14210 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14211 target = gen_reg_rtx (tmode);
14212 pat = GEN_FCN (icode) (target, op0, op1);
14218 case IX86_BUILTIN_PSLLDQI128:
14219 case IX86_BUILTIN_PSRLDQI128:
14220 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14221 : CODE_FOR_sse2_lshrti3);
14222 arg0 = TREE_VALUE (arglist);
14223 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14224 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14225 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14226 tmode = insn_data[icode].operand[0].mode;
14227 mode1 = insn_data[icode].operand[1].mode;
14228 mode2 = insn_data[icode].operand[2].mode;
14230 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14232 op0 = copy_to_reg (op0);
14233 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14235 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14237 error ("shift must be an immediate");
14240 target = gen_reg_rtx (V2DImode);
14241 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14247 case IX86_BUILTIN_FEMMS:
14248 emit_insn (gen_femms ());
14251 case IX86_BUILTIN_PAVGUSB:
14252 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14254 case IX86_BUILTIN_PF2ID:
14255 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14257 case IX86_BUILTIN_PFACC:
14258 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14260 case IX86_BUILTIN_PFADD:
14261 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14263 case IX86_BUILTIN_PFCMPEQ:
14264 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14266 case IX86_BUILTIN_PFCMPGE:
14267 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14269 case IX86_BUILTIN_PFCMPGT:
14270 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14272 case IX86_BUILTIN_PFMAX:
14273 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14275 case IX86_BUILTIN_PFMIN:
14276 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14278 case IX86_BUILTIN_PFMUL:
14279 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14281 case IX86_BUILTIN_PFRCP:
14282 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14284 case IX86_BUILTIN_PFRCPIT1:
14285 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14287 case IX86_BUILTIN_PFRCPIT2:
14288 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14290 case IX86_BUILTIN_PFRSQIT1:
14291 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14293 case IX86_BUILTIN_PFRSQRT:
14294 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14296 case IX86_BUILTIN_PFSUB:
14297 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14299 case IX86_BUILTIN_PFSUBR:
14300 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14302 case IX86_BUILTIN_PI2FD:
14303 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14305 case IX86_BUILTIN_PMULHRW:
14306 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14308 case IX86_BUILTIN_PF2IW:
14309 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14311 case IX86_BUILTIN_PFNACC:
14312 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14314 case IX86_BUILTIN_PFPNACC:
14315 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14317 case IX86_BUILTIN_PI2FW:
14318 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14320 case IX86_BUILTIN_PSWAPDSI:
14321 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14323 case IX86_BUILTIN_PSWAPDSF:
14324 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14326 case IX86_BUILTIN_SSE_ZERO:
14327 target = gen_reg_rtx (V4SFmode);
14328 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14331 case IX86_BUILTIN_MMX_ZERO:
14332 target = gen_reg_rtx (DImode);
14333 emit_insn (gen_mmx_clrdi (target));
14336 case IX86_BUILTIN_CLRTI:
14337 target = gen_reg_rtx (V2DImode);
14338 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14342 case IX86_BUILTIN_SQRTSD:
14343 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14344 case IX86_BUILTIN_LOADAPD:
14345 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14346 case IX86_BUILTIN_LOADUPD:
14347 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14349 case IX86_BUILTIN_STOREAPD:
14350 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14351 case IX86_BUILTIN_STOREUPD:
14352 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14354 case IX86_BUILTIN_LOADSD:
14355 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14357 case IX86_BUILTIN_STORESD:
14358 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14360 case IX86_BUILTIN_SETPD1:
14361 target = assign_386_stack_local (DFmode, 0);
14362 arg0 = TREE_VALUE (arglist);
14363 emit_move_insn (adjust_address (target, DFmode, 0),
14364 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14365 op0 = gen_reg_rtx (V2DFmode);
14366 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14367 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14370 case IX86_BUILTIN_SETPD:
14371 target = assign_386_stack_local (V2DFmode, 0);
14372 arg0 = TREE_VALUE (arglist);
14373 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14374 emit_move_insn (adjust_address (target, DFmode, 0),
14375 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14376 emit_move_insn (adjust_address (target, DFmode, 8),
14377 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14378 op0 = gen_reg_rtx (V2DFmode);
14379 emit_insn (gen_sse2_movapd (op0, target));
14382 case IX86_BUILTIN_LOADRPD:
14383 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14384 gen_reg_rtx (V2DFmode), 1);
14385 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14388 case IX86_BUILTIN_LOADPD1:
14389 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14390 gen_reg_rtx (V2DFmode), 1);
14391 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14394 case IX86_BUILTIN_STOREPD1:
14395 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14396 case IX86_BUILTIN_STORERPD:
14397 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14399 case IX86_BUILTIN_CLRPD:
14400 target = gen_reg_rtx (V2DFmode);
14401 emit_insn (gen_sse_clrv2df (target));
14404 case IX86_BUILTIN_MFENCE:
14405 emit_insn (gen_sse2_mfence ());
14407 case IX86_BUILTIN_LFENCE:
14408 emit_insn (gen_sse2_lfence ());
14411 case IX86_BUILTIN_CLFLUSH:
14412 arg0 = TREE_VALUE (arglist);
14413 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14414 icode = CODE_FOR_sse2_clflush;
14415 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14416 op0 = copy_to_mode_reg (Pmode, op0);
14418 emit_insn (gen_sse2_clflush (op0));
14421 case IX86_BUILTIN_MOVNTPD:
14422 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14423 case IX86_BUILTIN_MOVNTDQ:
14424 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14425 case IX86_BUILTIN_MOVNTI:
14426 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14428 case IX86_BUILTIN_LOADDQA:
14429 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14430 case IX86_BUILTIN_LOADDQU:
14431 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14432 case IX86_BUILTIN_LOADD:
14433 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14435 case IX86_BUILTIN_STOREDQA:
14436 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14437 case IX86_BUILTIN_STOREDQU:
14438 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14439 case IX86_BUILTIN_STORED:
14440 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14442 case IX86_BUILTIN_MONITOR:
14443 arg0 = TREE_VALUE (arglist);
14444 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14445 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14446 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14447 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14448 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14450 op0 = copy_to_mode_reg (SImode, op0);
14452 op1 = copy_to_mode_reg (SImode, op1);
14454 op2 = copy_to_mode_reg (SImode, op2);
14455 emit_insn (gen_monitor (op0, op1, op2));
14458 case IX86_BUILTIN_MWAIT:
14459 arg0 = TREE_VALUE (arglist);
14460 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14461 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14462 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14464 op0 = copy_to_mode_reg (SImode, op0);
14466 op1 = copy_to_mode_reg (SImode, op1);
14467 emit_insn (gen_mwait (op0, op1));
14470 case IX86_BUILTIN_LOADDDUP:
14471 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14473 case IX86_BUILTIN_LDDQU:
14474 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14481 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14482 if (d->code == fcode)
14484 /* Compares are treated specially. */
14485 if (d->icode == CODE_FOR_maskcmpv4sf3
14486 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14487 || d->icode == CODE_FOR_maskncmpv4sf3
14488 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14489 || d->icode == CODE_FOR_maskcmpv2df3
14490 || d->icode == CODE_FOR_vmmaskcmpv2df3
14491 || d->icode == CODE_FOR_maskncmpv2df3
14492 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14493 return ix86_expand_sse_compare (d, arglist, target);
14495 return ix86_expand_binop_builtin (d->icode, arglist, target);
14498 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14499 if (d->code == fcode)
14500 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14502 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14503 if (d->code == fcode)
14504 return ix86_expand_sse_comi (d, arglist, target);
14506 /* @@@ Should really do something sensible here. */
14510 /* Store OPERAND to the memory after reload is completed. This means
14511 that we can't easily use assign_stack_local. */
14513 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14516 if (!reload_completed)
14518 if (TARGET_RED_ZONE)
14520 result = gen_rtx_MEM (mode,
14521 gen_rtx_PLUS (Pmode,
14523 GEN_INT (-RED_ZONE_SIZE)));
14524 emit_move_insn (result, operand);
14526 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14532 operand = gen_lowpart (DImode, operand);
14536 gen_rtx_SET (VOIDmode,
14537 gen_rtx_MEM (DImode,
14538 gen_rtx_PRE_DEC (DImode,
14539 stack_pointer_rtx)),
14545 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14554 split_di (&operand, 1, operands, operands + 1);
14556 gen_rtx_SET (VOIDmode,
14557 gen_rtx_MEM (SImode,
14558 gen_rtx_PRE_DEC (Pmode,
14559 stack_pointer_rtx)),
14562 gen_rtx_SET (VOIDmode,
14563 gen_rtx_MEM (SImode,
14564 gen_rtx_PRE_DEC (Pmode,
14565 stack_pointer_rtx)),
14570 /* It is better to store HImodes as SImodes. */
14571 if (!TARGET_PARTIAL_REG_STALL)
14572 operand = gen_lowpart (SImode, operand);
14576 gen_rtx_SET (VOIDmode,
14577 gen_rtx_MEM (GET_MODE (operand),
14578 gen_rtx_PRE_DEC (SImode,
14579 stack_pointer_rtx)),
14585 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14590 /* Free operand from the memory. */
14592 ix86_free_from_memory (enum machine_mode mode)
14594 if (!TARGET_RED_ZONE)
14598 if (mode == DImode || TARGET_64BIT)
14600 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14604 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14605 to pop or add instruction if registers are available. */
14606 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14607 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14612 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14613 QImode must go into class Q_REGS.
14614 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14615 movdf to do mem-to-mem moves through integer regs. */
14617 ix86_preferred_reload_class (rtx x, enum reg_class class)
14619 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14621 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14623 /* SSE can't load any constant directly yet. */
14624 if (SSE_CLASS_P (class))
14626 /* Floats can load 0 and 1. */
14627 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14629 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14630 if (MAYBE_SSE_CLASS_P (class))
14631 return (reg_class_subset_p (class, GENERAL_REGS)
14632 ? GENERAL_REGS : FLOAT_REGS);
14636 /* General regs can load everything. */
14637 if (reg_class_subset_p (class, GENERAL_REGS))
14638 return GENERAL_REGS;
14639 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14640 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14643 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14645 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14650 /* If we are copying between general and FP registers, we need a memory
14651 location. The same is true for SSE and MMX registers.
14653 The macro can't work reliably when one of the CLASSES is class containing
14654 registers from multiple units (SSE, MMX, integer). We avoid this by never
14655 combining those units in single alternative in the machine description.
14656 Ensure that this constraint holds to avoid unexpected surprises.
14658 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14659 enforce these sanity checks. */
14661 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14662 enum machine_mode mode, int strict)
14664 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14665 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14666 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14667 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14668 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14669 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14676 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14677 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14678 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14679 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14680 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14682 /* Return the cost of moving data from a register in class CLASS1 to
14683 one in class CLASS2.
14685 It is not required that the cost always equal 2 when FROM is the same as TO;
14686 on some machines it is expensive to move between registers if they are not
14687 general registers. */
14689 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14690 enum reg_class class2)
14692 /* In case we require secondary memory, compute cost of the store followed
14693 by load. In order to avoid bad register allocation choices, we need
14694 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14696 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14700 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14701 MEMORY_MOVE_COST (mode, class1, 1));
14702 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14703 MEMORY_MOVE_COST (mode, class2, 1));
14705 /* In case of copying from general_purpose_register we may emit multiple
14706 stores followed by single load causing memory size mismatch stall.
14707 Count this as arbitrarily high cost of 20. */
14708 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14711 /* In the case of FP/MMX moves, the registers actually overlap, and we
14712 have to switch modes in order to treat them differently. */
14713 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14714 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14720 /* Moves between SSE/MMX and integer unit are expensive. */
14721 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14722 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14723 return ix86_cost->mmxsse_to_integer;
14724 if (MAYBE_FLOAT_CLASS_P (class1))
14725 return ix86_cost->fp_move;
14726 if (MAYBE_SSE_CLASS_P (class1))
14727 return ix86_cost->sse_move;
14728 if (MAYBE_MMX_CLASS_P (class1))
14729 return ix86_cost->mmx_move;
14733 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14735 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14737 /* Flags and only flags can only hold CCmode values. */
14738 if (CC_REGNO_P (regno))
14739 return GET_MODE_CLASS (mode) == MODE_CC;
14740 if (GET_MODE_CLASS (mode) == MODE_CC
14741 || GET_MODE_CLASS (mode) == MODE_RANDOM
14742 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14744 if (FP_REGNO_P (regno))
14745 return VALID_FP_MODE_P (mode);
14746 if (SSE_REGNO_P (regno))
14747 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14748 if (MMX_REGNO_P (regno))
14750 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14751 /* We handle both integer and floats in the general purpose registers.
14752 In future we should be able to handle vector modes as well. */
14753 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14755 /* Take care for QImode values - they can be in non-QI regs, but then
14756 they do cause partial register stalls. */
14757 if (regno < 4 || mode != QImode || TARGET_64BIT)
14759 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14762 /* Return the cost of moving data of mode M between a
14763 register and memory. A value of 2 is the default; this cost is
14764 relative to those in `REGISTER_MOVE_COST'.
14766 If moving between registers and memory is more expensive than
14767 between two registers, you should define this macro to express the
14770 Model also increased moving costs of QImode registers in non
14774 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14776 if (FLOAT_CLASS_P (class))
14793 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14795 if (SSE_CLASS_P (class))
14798 switch (GET_MODE_SIZE (mode))
14812 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14814 if (MMX_CLASS_P (class))
14817 switch (GET_MODE_SIZE (mode))
14828 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14830 switch (GET_MODE_SIZE (mode))
14834 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14835 : ix86_cost->movzbl_load);
14837 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14838 : ix86_cost->int_store[0] + 4);
14841 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14843 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14844 if (mode == TFmode)
14846 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14847 * (((int) GET_MODE_SIZE (mode)
14848 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14852 /* Compute a (partial) cost for rtx X. Return true if the complete
14853 cost has been computed, and false if subexpressions should be
14854 scanned. In either case, *TOTAL contains the cost result. */
14857 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14859 enum machine_mode mode = GET_MODE (x);
14867 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14869 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14871 else if (flag_pic && SYMBOLIC_CONST (x)
14873 || (!GET_CODE (x) != LABEL_REF
14874 && (GET_CODE (x) != SYMBOL_REF
14875 || !SYMBOL_REF_LOCAL_P (x)))))
14882 if (mode == VOIDmode)
14885 switch (standard_80387_constant_p (x))
14890 default: /* Other constants */
14895 /* Start with (MEM (SYMBOL_REF)), since that's where
14896 it'll probably end up. Add a penalty for size. */
14897 *total = (COSTS_N_INSNS (1)
14898 + (flag_pic != 0 && !TARGET_64BIT)
14899 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14905 /* The zero extensions is often completely free on x86_64, so make
14906 it as cheap as possible. */
14907 if (TARGET_64BIT && mode == DImode
14908 && GET_MODE (XEXP (x, 0)) == SImode)
14910 else if (TARGET_ZERO_EXTEND_WITH_AND)
14911 *total = COSTS_N_INSNS (ix86_cost->add);
14913 *total = COSTS_N_INSNS (ix86_cost->movzx);
14917 *total = COSTS_N_INSNS (ix86_cost->movsx);
14921 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14922 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14924 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14927 *total = COSTS_N_INSNS (ix86_cost->add);
14930 if ((value == 2 || value == 3)
14931 && !TARGET_DECOMPOSE_LEA
14932 && ix86_cost->lea <= ix86_cost->shift_const)
14934 *total = COSTS_N_INSNS (ix86_cost->lea);
14944 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14946 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14948 if (INTVAL (XEXP (x, 1)) > 32)
14949 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14951 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14955 if (GET_CODE (XEXP (x, 1)) == AND)
14956 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14958 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14963 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14964 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14966 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14971 if (FLOAT_MODE_P (mode))
14972 *total = COSTS_N_INSNS (ix86_cost->fmul);
14973 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14975 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14978 for (nbits = 0; value != 0; value >>= 1)
14981 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14982 + nbits * ix86_cost->mult_bit);
14986 /* This is arbitrary */
14987 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14988 + 7 * ix86_cost->mult_bit);
14996 if (FLOAT_MODE_P (mode))
14997 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14999 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15003 if (FLOAT_MODE_P (mode))
15004 *total = COSTS_N_INSNS (ix86_cost->fadd);
15005 else if (!TARGET_DECOMPOSE_LEA
15006 && GET_MODE_CLASS (mode) == MODE_INT
15007 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15009 if (GET_CODE (XEXP (x, 0)) == PLUS
15010 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15011 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15012 && CONSTANT_P (XEXP (x, 1)))
15014 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15015 if (val == 2 || val == 4 || val == 8)
15017 *total = COSTS_N_INSNS (ix86_cost->lea);
15018 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15019 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15021 *total += rtx_cost (XEXP (x, 1), outer_code);
15025 else if (GET_CODE (XEXP (x, 0)) == MULT
15026 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15028 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15029 if (val == 2 || val == 4 || val == 8)
15031 *total = COSTS_N_INSNS (ix86_cost->lea);
15032 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15033 *total += rtx_cost (XEXP (x, 1), outer_code);
15037 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15039 *total = COSTS_N_INSNS (ix86_cost->lea);
15040 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15041 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15042 *total += rtx_cost (XEXP (x, 1), outer_code);
15049 if (FLOAT_MODE_P (mode))
15051 *total = COSTS_N_INSNS (ix86_cost->fadd);
15059 if (!TARGET_64BIT && mode == DImode)
15061 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15062 + (rtx_cost (XEXP (x, 0), outer_code)
15063 << (GET_MODE (XEXP (x, 0)) != DImode))
15064 + (rtx_cost (XEXP (x, 1), outer_code)
15065 << (GET_MODE (XEXP (x, 1)) != DImode)));
15071 if (FLOAT_MODE_P (mode))
15073 *total = COSTS_N_INSNS (ix86_cost->fchs);
15079 if (!TARGET_64BIT && mode == DImode)
15080 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15082 *total = COSTS_N_INSNS (ix86_cost->add);
15086 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15091 if (FLOAT_MODE_P (mode))
15092 *total = COSTS_N_INSNS (ix86_cost->fabs);
15096 if (FLOAT_MODE_P (mode))
15097 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15101 if (XINT (x, 1) == UNSPEC_TP)
15110 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15112 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15115 fputs ("\tpushl $", asm_out_file);
15116 assemble_name (asm_out_file, XSTR (symbol, 0));
15117 fputc ('\n', asm_out_file);
15123 static int current_machopic_label_num;
15125 /* Given a symbol name and its associated stub, write out the
15126 definition of the stub. */
15129 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15131 unsigned int length;
15132 char *binder_name, *symbol_name, lazy_ptr_name[32];
15133 int label = ++current_machopic_label_num;
15135 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15136 symb = (*targetm.strip_name_encoding) (symb);
15138 length = strlen (stub);
15139 binder_name = alloca (length + 32);
15140 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15142 length = strlen (symb);
15143 symbol_name = alloca (length + 32);
15144 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15146 sprintf (lazy_ptr_name, "L%d$lz", label);
15149 machopic_picsymbol_stub_section ();
15151 machopic_symbol_stub_section ();
15153 fprintf (file, "%s:\n", stub);
15154 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15158 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15159 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15160 fprintf (file, "\tjmp %%edx\n");
15163 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15165 fprintf (file, "%s:\n", binder_name);
15169 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15170 fprintf (file, "\tpushl %%eax\n");
15173 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15175 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15177 machopic_lazy_symbol_ptr_section ();
15178 fprintf (file, "%s:\n", lazy_ptr_name);
15179 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15180 fprintf (file, "\t.long %s\n", binder_name);
15182 #endif /* TARGET_MACHO */
15184 /* Order the registers for register allocator. */
15187 x86_order_regs_for_local_alloc (void)
15192 /* First allocate the local general purpose registers. */
15193 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15194 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15195 reg_alloc_order [pos++] = i;
15197 /* Global general purpose registers. */
15198 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15199 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15200 reg_alloc_order [pos++] = i;
15202 /* x87 registers come first in case we are doing FP math
15204 if (!TARGET_SSE_MATH)
15205 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15206 reg_alloc_order [pos++] = i;
15208 /* SSE registers. */
15209 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15210 reg_alloc_order [pos++] = i;
15211 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15212 reg_alloc_order [pos++] = i;
15214 /* x87 registers. */
15215 if (TARGET_SSE_MATH)
15216 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15217 reg_alloc_order [pos++] = i;
15219 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15220 reg_alloc_order [pos++] = i;
15222 /* Initialize the rest of array as we do not allocate some registers
15224 while (pos < FIRST_PSEUDO_REGISTER)
15225 reg_alloc_order [pos++] = 0;
15228 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15229 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15232 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15233 struct attribute_spec.handler. */
15235 ix86_handle_struct_attribute (tree *node, tree name,
15236 tree args ATTRIBUTE_UNUSED,
15237 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15240 if (DECL_P (*node))
15242 if (TREE_CODE (*node) == TYPE_DECL)
15243 type = &TREE_TYPE (*node);
15248 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15249 || TREE_CODE (*type) == UNION_TYPE)))
15251 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15252 *no_add_attrs = true;
15255 else if ((is_attribute_p ("ms_struct", name)
15256 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15257 || ((is_attribute_p ("gcc_struct", name)
15258 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15260 warning ("`%s' incompatible attribute ignored",
15261 IDENTIFIER_POINTER (name));
15262 *no_add_attrs = true;
15269 ix86_ms_bitfield_layout_p (tree record_type)
15271 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15272 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15273 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15276 /* Returns an expression indicating where the this parameter is
15277 located on entry to the FUNCTION. */
15280 x86_this_parameter (tree function)
15282 tree type = TREE_TYPE (function);
15286 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15287 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15290 if (ix86_function_regparm (type, function) > 0)
15294 parm = TYPE_ARG_TYPES (type);
15295 /* Figure out whether or not the function has a variable number of
15297 for (; parm; parm = TREE_CHAIN (parm))
15298 if (TREE_VALUE (parm) == void_type_node)
15300 /* If not, the this parameter is in the first argument. */
15304 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15306 return gen_rtx_REG (SImode, regno);
15310 if (aggregate_value_p (TREE_TYPE (type), type))
15311 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15313 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15316 /* Determine whether x86_output_mi_thunk can succeed. */
15319 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15320 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15321 HOST_WIDE_INT vcall_offset, tree function)
15323 /* 64-bit can handle anything. */
15327 /* For 32-bit, everything's fine if we have one free register. */
15328 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15331 /* Need a free register for vcall_offset. */
15335 /* Need a free register for GOT references. */
15336 if (flag_pic && !(*targetm.binds_local_p) (function))
15339 /* Otherwise ok. */
15343 /* Output the assembler code for a thunk function. THUNK_DECL is the
15344 declaration for the thunk function itself, FUNCTION is the decl for
15345 the target function. DELTA is an immediate constant offset to be
15346 added to THIS. If VCALL_OFFSET is nonzero, the word at
15347 *(*this + vcall_offset) should be added to THIS. */
15350 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15351 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15352 HOST_WIDE_INT vcall_offset, tree function)
15355 rtx this = x86_this_parameter (function);
15358 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15359 pull it in now and let DELTA benefit. */
15362 else if (vcall_offset)
15364 /* Put the this parameter into %eax. */
15366 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15367 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15370 this_reg = NULL_RTX;
15372 /* Adjust the this parameter by a fixed constant. */
15375 xops[0] = GEN_INT (delta);
15376 xops[1] = this_reg ? this_reg : this;
15379 if (!x86_64_general_operand (xops[0], DImode))
15381 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15383 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15387 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15390 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15393 /* Adjust the this parameter by a value stored in the vtable. */
15397 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15400 int tmp_regno = 2 /* ECX */;
15401 if (lookup_attribute ("fastcall",
15402 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15403 tmp_regno = 0 /* EAX */;
15404 tmp = gen_rtx_REG (SImode, tmp_regno);
15407 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15410 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15412 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15414 /* Adjust the this parameter. */
15415 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15416 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15418 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15419 xops[0] = GEN_INT (vcall_offset);
15421 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15422 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15424 xops[1] = this_reg;
15426 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15428 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15431 /* If necessary, drop THIS back to its stack slot. */
15432 if (this_reg && this_reg != this)
15434 xops[0] = this_reg;
15436 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15439 xops[0] = XEXP (DECL_RTL (function), 0);
15442 if (!flag_pic || (*targetm.binds_local_p) (function))
15443 output_asm_insn ("jmp\t%P0", xops);
15446 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15447 tmp = gen_rtx_CONST (Pmode, tmp);
15448 tmp = gen_rtx_MEM (QImode, tmp);
15450 output_asm_insn ("jmp\t%A0", xops);
15455 if (!flag_pic || (*targetm.binds_local_p) (function))
15456 output_asm_insn ("jmp\t%P0", xops);
15461 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15462 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15463 tmp = gen_rtx_MEM (QImode, tmp);
15465 output_asm_insn ("jmp\t%0", xops);
15468 #endif /* TARGET_MACHO */
15470 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15471 output_set_got (tmp);
15474 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15475 output_asm_insn ("jmp\t{*}%1", xops);
15481 x86_file_start (void)
15483 default_file_start ();
15484 if (X86_FILE_START_VERSION_DIRECTIVE)
15485 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15486 if (X86_FILE_START_FLTUSED)
15487 fputs ("\t.global\t__fltused\n", asm_out_file);
15488 if (ix86_asm_dialect == ASM_INTEL)
15489 fputs ("\t.intel_syntax\n", asm_out_file);
15493 x86_field_alignment (tree field, int computed)
15495 enum machine_mode mode;
15496 tree type = TREE_TYPE (field);
15498 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15500 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15501 ? get_inner_array_type (type) : type);
15502 if (mode == DFmode || mode == DCmode
15503 || GET_MODE_CLASS (mode) == MODE_INT
15504 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15505 return MIN (32, computed);
15509 /* Output assembler code to FILE to increment profiler label # LABELNO
15510 for profiling a function entry. */
15512 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15517 #ifndef NO_PROFILE_COUNTERS
15518 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15520 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15524 #ifndef NO_PROFILE_COUNTERS
15525 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15527 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15531 #ifndef NO_PROFILE_COUNTERS
15532 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15533 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15535 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15539 #ifndef NO_PROFILE_COUNTERS
15540 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15541 PROFILE_COUNT_REGISTER);
15543 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15547 /* We don't have exact information about the insn sizes, but we may assume
15548 quite safely that we are informed about all 1 byte insns and memory
15549 address sizes. This is enough to eliminate unnecessary padding in
15553 min_insn_size (rtx insn)
15557 if (!INSN_P (insn) || !active_insn_p (insn))
15560 /* Discard alignments we've emit and jump instructions. */
15561 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15562 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15564 if (GET_CODE (insn) == JUMP_INSN
15565 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15566 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15569 /* Important case - calls are always 5 bytes.
15570 It is common to have many calls in the row. */
15571 if (GET_CODE (insn) == CALL_INSN
15572 && symbolic_reference_mentioned_p (PATTERN (insn))
15573 && !SIBLING_CALL_P (insn))
15575 if (get_attr_length (insn) <= 1)
15578 /* For normal instructions we may rely on the sizes of addresses
15579 and the presence of symbol to require 4 bytes of encoding.
15580 This is not the case for jumps where references are PC relative. */
15581 if (GET_CODE (insn) != JUMP_INSN)
15583 l = get_attr_length_address (insn);
15584 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15593 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15597 k8_avoid_jump_misspredicts (void)
15599 rtx insn, start = get_insns ();
15600 int nbytes = 0, njumps = 0;
15603 /* Look for all minimal intervals of instructions containing 4 jumps.
15604 The intervals are bounded by START and INSN. NBYTES is the total
15605 size of instructions in the interval including INSN and not including
15606 START. When the NBYTES is smaller than 16 bytes, it is possible
15607 that the end of START and INSN ends up in the same 16byte page.
15609 The smallest offset in the page INSN can start is the case where START
15610 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15611 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15613 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15616 nbytes += min_insn_size (insn);
15618 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15619 INSN_UID (insn), min_insn_size (insn));
15620 if ((GET_CODE (insn) == JUMP_INSN
15621 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15622 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15623 || GET_CODE (insn) == CALL_INSN)
15630 start = NEXT_INSN (start);
15631 if ((GET_CODE (start) == JUMP_INSN
15632 && GET_CODE (PATTERN (start)) != ADDR_VEC
15633 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15634 || GET_CODE (start) == CALL_INSN)
15635 njumps--, isjump = 1;
15638 nbytes -= min_insn_size (start);
15643 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15644 INSN_UID (start), INSN_UID (insn), nbytes);
15646 if (njumps == 3 && isjump && nbytes < 16)
15648 int padsize = 15 - nbytes + min_insn_size (insn);
15651 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15652 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15657 /* Implement machine specific optimizations.
15658 At the moment we implement single transformation: AMD Athlon works faster
15659 when RET is not destination of conditional jump or directly preceded
15660 by other jump instruction. We avoid the penalty by inserting NOP just
15661 before the RET instructions in such cases. */
15667 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15669 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15671 basic_block bb = e->src;
15672 rtx ret = BB_END (bb);
15674 bool replace = false;
15676 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15677 || !maybe_hot_bb_p (bb))
15679 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15680 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15682 if (prev && GET_CODE (prev) == CODE_LABEL)
15685 for (e = bb->pred; e; e = e->pred_next)
15686 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15687 && !(e->flags & EDGE_FALLTHRU))
15692 prev = prev_active_insn (ret);
15694 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15695 || GET_CODE (prev) == CALL_INSN))
15697 /* Empty functions get branch mispredict even when the jump destination
15698 is not visible to us. */
15699 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15704 emit_insn_before (gen_return_internal_long (), ret);
15708 k8_avoid_jump_misspredicts ();
15711 /* Return nonzero when QImode register that must be represented via REX prefix
15714 x86_extended_QIreg_mentioned_p (rtx insn)
15717 extract_insn_cached (insn);
15718 for (i = 0; i < recog_data.n_operands; i++)
15719 if (REG_P (recog_data.operand[i])
15720 && REGNO (recog_data.operand[i]) >= 4)
15725 /* Return nonzero when P points to register encoded via REX prefix.
15726 Called via for_each_rtx. */
15728 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15730 unsigned int regno;
15733 regno = REGNO (*p);
15734 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15737 /* Return true when INSN mentions register that must be encoded using REX
15740 x86_extended_reg_mentioned_p (rtx insn)
15742 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15745 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15746 optabs would emit if we didn't have TFmode patterns. */
15749 x86_emit_floatuns (rtx operands[2])
15751 rtx neglab, donelab, i0, i1, f0, in, out;
15752 enum machine_mode mode, inmode;
15754 inmode = GET_MODE (operands[1]);
15755 if (inmode != SImode
15756 && inmode != DImode)
15760 in = force_reg (inmode, operands[1]);
15761 mode = GET_MODE (out);
15762 neglab = gen_label_rtx ();
15763 donelab = gen_label_rtx ();
15764 i1 = gen_reg_rtx (Pmode);
15765 f0 = gen_reg_rtx (mode);
15767 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15769 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15770 emit_jump_insn (gen_jump (donelab));
15773 emit_label (neglab);
15775 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15776 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15777 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15778 expand_float (f0, i0, 0);
15779 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15781 emit_label (donelab);
15784 /* Return if we do not know how to pass TYPE solely in registers. */
15786 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15788 if (default_must_pass_in_stack (mode, type))
15790 return (!TARGET_64BIT && type && mode == TImode);
15793 #include "gt-i386.h"