1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
837 rtx base, index, disp;
839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
842 static int ix86_decompose_address (rtx, struct ix86_address *);
843 static int ix86_address_cost (rtx);
844 static bool ix86_cannot_force_const_mem (rtx);
845 static rtx ix86_delegitimize_address (rtx);
847 struct builtin_description;
848 static rtx ix86_expand_sse_comi (const struct builtin_description *,
850 static rtx ix86_expand_sse_compare (const struct builtin_description *,
852 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855 static rtx ix86_expand_store_builtin (enum insn_code, tree);
856 static rtx safe_vector_operand (rtx, enum machine_mode);
857 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864 static int ix86_fp_comparison_cost (enum rtx_code code);
865 static unsigned int ix86_select_alt_pic_regnum (void);
866 static int ix86_save_reg (unsigned int, int);
867 static void ix86_compute_frame_layout (struct ix86_frame *);
868 static int ix86_comp_type_attributes (tree, tree);
869 static int ix86_function_regparm (tree, tree);
870 const struct attribute_spec ix86_attribute_table[];
871 static bool ix86_function_ok_for_sibcall (tree, tree);
872 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874 static int ix86_value_regno (enum machine_mode);
875 static bool contains_128bit_aligned_vector_p (tree);
876 static bool ix86_ms_bitfield_layout_p (tree);
877 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878 static int extended_reg_mentioned_1 (rtx *, void *);
879 static bool ix86_rtx_costs (rtx, int, int, int *);
880 static int min_insn_size (rtx);
881 static void k8_avoid_jump_misspredicts (void);
883 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
884 static void ix86_svr3_asm_out_constructor (rtx, int);
887 /* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
893 whenever possible (upper half does contain padding).
895 enum x86_64_reg_class
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
908 static const char * const x86_64_reg_class_name[] =
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
911 #define MAX_CLASSES 4
912 static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914 static int examine_argument (enum machine_mode, tree, int, int *, int *);
915 static rtx construct_container (enum machine_mode, tree, int, int, int,
917 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
920 /* Table of constants used by fldpi, fldln2, etc... */
921 static REAL_VALUE_TYPE ext_80387_constants_table [5];
922 static bool ext_80387_constants_init = 0;
923 static void init_ext_80387_constants (void);
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_DELEGITIMIZE_ADDRESS
994 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
996 #undef TARGET_MS_BITFIELD_LAYOUT_P
997 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999 #undef TARGET_ASM_OUTPUT_MI_THUNK
1000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1004 #undef TARGET_ASM_FILE_START
1005 #define TARGET_ASM_FILE_START x86_file_start
1007 #undef TARGET_RTX_COSTS
1008 #define TARGET_RTX_COSTS ix86_rtx_costs
1009 #undef TARGET_ADDRESS_COST
1010 #define TARGET_ADDRESS_COST ix86_address_cost
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 struct gcc_target targetm = TARGET_INITIALIZER;
1017 /* The svr4 ABI for the i386 says that records and unions are returned
1019 #ifndef DEFAULT_PCC_STRUCT_RETURN
1020 #define DEFAULT_PCC_STRUCT_RETURN 1
1023 /* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1033 override_options (void)
1036 /* Comes from final.c -- no real reason to change it. */
1037 #define MAX_CODE_ALIGN 16
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
1045 const int align_loop_max_skip;
1046 const int align_jump;
1047 const int align_jump_max_skip;
1048 const int align_func;
1050 const processor_target_table[PROCESSOR_max] =
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
1067 const enum pta_flags
1072 PTA_PREFETCH_SSE = 8,
1078 const processor_alias_table[] =
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1094 PTA_MMX | PTA_PREFETCH_SSE},
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1101 | PTA_3DNOW | PTA_3DNOW_A},
1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1103 | PTA_3DNOW_A | PTA_SSE},
1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
1114 /* Set the default values for switches whose default depends on TARGET_64BIT
1115 in case they weren't overwritten by command line options. */
1118 if (flag_omit_frame_pointer == 2)
1119 flag_omit_frame_pointer = 1;
1120 if (flag_asynchronous_unwind_tables == 2)
1121 flag_asynchronous_unwind_tables = 1;
1122 if (flag_pcc_struct_return == 2)
1123 flag_pcc_struct_return = 0;
1127 if (flag_omit_frame_pointer == 2)
1128 flag_omit_frame_pointer = 0;
1129 if (flag_asynchronous_unwind_tables == 2)
1130 flag_asynchronous_unwind_tables = 0;
1131 if (flag_pcc_struct_return == 2)
1132 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1135 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1136 SUBTARGET_OVERRIDE_OPTIONS;
1139 if (!ix86_tune_string && ix86_arch_string)
1140 ix86_tune_string = ix86_arch_string;
1141 if (!ix86_tune_string)
1142 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1143 if (!ix86_arch_string)
1144 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1146 if (ix86_cmodel_string != 0)
1148 if (!strcmp (ix86_cmodel_string, "small"))
1149 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1151 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1152 else if (!strcmp (ix86_cmodel_string, "32"))
1153 ix86_cmodel = CM_32;
1154 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1155 ix86_cmodel = CM_KERNEL;
1156 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1157 ix86_cmodel = CM_MEDIUM;
1158 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1159 ix86_cmodel = CM_LARGE;
1161 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1165 ix86_cmodel = CM_32;
1167 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1169 if (ix86_asm_string != 0)
1171 if (!strcmp (ix86_asm_string, "intel"))
1172 ix86_asm_dialect = ASM_INTEL;
1173 else if (!strcmp (ix86_asm_string, "att"))
1174 ix86_asm_dialect = ASM_ATT;
1176 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1178 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1179 error ("code model `%s' not supported in the %s bit mode",
1180 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1181 if (ix86_cmodel == CM_LARGE)
1182 sorry ("code model `large' not supported yet");
1183 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1184 sorry ("%i-bit mode not compiled in",
1185 (target_flags & MASK_64BIT) ? 64 : 32);
1187 for (i = 0; i < pta_size; i++)
1188 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1190 ix86_arch = processor_alias_table[i].processor;
1191 /* Default cpu tuning to the architecture. */
1192 ix86_tune = ix86_arch;
1193 if (processor_alias_table[i].flags & PTA_MMX
1194 && !(target_flags_explicit & MASK_MMX))
1195 target_flags |= MASK_MMX;
1196 if (processor_alias_table[i].flags & PTA_3DNOW
1197 && !(target_flags_explicit & MASK_3DNOW))
1198 target_flags |= MASK_3DNOW;
1199 if (processor_alias_table[i].flags & PTA_3DNOW_A
1200 && !(target_flags_explicit & MASK_3DNOW_A))
1201 target_flags |= MASK_3DNOW_A;
1202 if (processor_alias_table[i].flags & PTA_SSE
1203 && !(target_flags_explicit & MASK_SSE))
1204 target_flags |= MASK_SSE;
1205 if (processor_alias_table[i].flags & PTA_SSE2
1206 && !(target_flags_explicit & MASK_SSE2))
1207 target_flags |= MASK_SSE2;
1208 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1209 x86_prefetch_sse = true;
1210 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1211 error ("CPU you selected does not support x86-64 instruction set");
1216 error ("bad value (%s) for -march= switch", ix86_arch_string);
1218 for (i = 0; i < pta_size; i++)
1219 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1221 ix86_tune = processor_alias_table[i].processor;
1222 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1223 error ("CPU you selected does not support x86-64 instruction set");
1226 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1227 x86_prefetch_sse = true;
1229 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1232 ix86_cost = &size_cost;
1234 ix86_cost = processor_target_table[ix86_tune].cost;
1235 target_flags |= processor_target_table[ix86_tune].target_enable;
1236 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1238 /* Arrange to set up i386_stack_locals for all functions. */
1239 init_machine_status = ix86_init_machine_status;
1241 /* Validate -mregparm= value. */
1242 if (ix86_regparm_string)
1244 i = atoi (ix86_regparm_string);
1245 if (i < 0 || i > REGPARM_MAX)
1246 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1252 ix86_regparm = REGPARM_MAX;
1254 /* If the user has provided any of the -malign-* options,
1255 warn and use that value only if -falign-* is not set.
1256 Remove this code in GCC 3.2 or later. */
1257 if (ix86_align_loops_string)
1259 warning ("-malign-loops is obsolete, use -falign-loops");
1260 if (align_loops == 0)
1262 i = atoi (ix86_align_loops_string);
1263 if (i < 0 || i > MAX_CODE_ALIGN)
1264 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1266 align_loops = 1 << i;
1270 if (ix86_align_jumps_string)
1272 warning ("-malign-jumps is obsolete, use -falign-jumps");
1273 if (align_jumps == 0)
1275 i = atoi (ix86_align_jumps_string);
1276 if (i < 0 || i > MAX_CODE_ALIGN)
1277 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1279 align_jumps = 1 << i;
1283 if (ix86_align_funcs_string)
1285 warning ("-malign-functions is obsolete, use -falign-functions");
1286 if (align_functions == 0)
1288 i = atoi (ix86_align_funcs_string);
1289 if (i < 0 || i > MAX_CODE_ALIGN)
1290 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1292 align_functions = 1 << i;
1296 /* Default align_* from the processor table. */
1297 if (align_loops == 0)
1299 align_loops = processor_target_table[ix86_tune].align_loop;
1300 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1302 if (align_jumps == 0)
1304 align_jumps = processor_target_table[ix86_tune].align_jump;
1305 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1307 if (align_functions == 0)
1309 align_functions = processor_target_table[ix86_tune].align_func;
1312 /* Validate -mpreferred-stack-boundary= value, or provide default.
1313 The default of 128 bits is for Pentium III's SSE __m128, but we
1314 don't want additional code to keep the stack aligned when
1315 optimizing for code size. */
1316 ix86_preferred_stack_boundary = (optimize_size
1317 ? TARGET_64BIT ? 128 : 32
1319 if (ix86_preferred_stack_boundary_string)
1321 i = atoi (ix86_preferred_stack_boundary_string);
1322 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1323 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1324 TARGET_64BIT ? 4 : 2);
1326 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1329 /* Validate -mbranch-cost= value, or provide default. */
1330 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1331 if (ix86_branch_cost_string)
1333 i = atoi (ix86_branch_cost_string);
1335 error ("-mbranch-cost=%d is not between 0 and 5", i);
1337 ix86_branch_cost = i;
1340 if (ix86_tls_dialect_string)
1342 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1343 ix86_tls_dialect = TLS_DIALECT_GNU;
1344 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1345 ix86_tls_dialect = TLS_DIALECT_SUN;
1347 error ("bad value (%s) for -mtls-dialect= switch",
1348 ix86_tls_dialect_string);
1351 /* Keep nonleaf frame pointers. */
1352 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1353 flag_omit_frame_pointer = 1;
1355 /* If we're doing fast math, we don't care about comparison order
1356 wrt NaNs. This lets us use a shorter comparison sequence. */
1357 if (flag_unsafe_math_optimizations)
1358 target_flags &= ~MASK_IEEE_FP;
1360 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1361 since the insns won't need emulation. */
1362 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1363 target_flags &= ~MASK_NO_FANCY_MATH_387;
1365 /* Turn on SSE2 builtins for -mpni. */
1367 target_flags |= MASK_SSE2;
1369 /* Turn on SSE builtins for -msse2. */
1371 target_flags |= MASK_SSE;
1375 if (TARGET_ALIGN_DOUBLE)
1376 error ("-malign-double makes no sense in the 64bit mode");
1378 error ("-mrtd calling convention not supported in the 64bit mode");
1379 /* Enable by default the SSE and MMX builtins. */
1380 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1381 ix86_fpmath = FPMATH_SSE;
1385 ix86_fpmath = FPMATH_387;
1386 /* i386 ABI does not specify red zone. It still makes sense to use it
1387 when programmer takes care to stack from being destroyed. */
1388 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1389 target_flags |= MASK_NO_RED_ZONE;
1392 if (ix86_fpmath_string != 0)
1394 if (! strcmp (ix86_fpmath_string, "387"))
1395 ix86_fpmath = FPMATH_387;
1396 else if (! strcmp (ix86_fpmath_string, "sse"))
1400 warning ("SSE instruction set disabled, using 387 arithmetics");
1401 ix86_fpmath = FPMATH_387;
1404 ix86_fpmath = FPMATH_SSE;
1406 else if (! strcmp (ix86_fpmath_string, "387,sse")
1407 || ! strcmp (ix86_fpmath_string, "sse,387"))
1411 warning ("SSE instruction set disabled, using 387 arithmetics");
1412 ix86_fpmath = FPMATH_387;
1414 else if (!TARGET_80387)
1416 warning ("387 instruction set disabled, using SSE arithmetics");
1417 ix86_fpmath = FPMATH_SSE;
1420 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1423 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1426 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1430 target_flags |= MASK_MMX;
1431 x86_prefetch_sse = true;
1434 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1437 target_flags |= MASK_MMX;
1438 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1439 extensions it adds. */
1440 if (x86_3dnow_a & (1 << ix86_arch))
1441 target_flags |= MASK_3DNOW_A;
1443 if ((x86_accumulate_outgoing_args & TUNEMASK)
1444 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1446 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1448 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1451 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1452 p = strchr (internal_label_prefix, 'X');
1453 internal_label_prefix_len = p - internal_label_prefix;
1459 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1461 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1462 make the problem with not enough registers even worse. */
1463 #ifdef INSN_SCHEDULING
1465 flag_schedule_insns = 0;
1468 /* The default values of these switches depend on the TARGET_64BIT
1469 that is not known at this moment. Mark these values with 2 and
1470 let user the to override these. In case there is no command line option
1471 specifying them, we will set the defaults in override_options. */
1473 flag_omit_frame_pointer = 2;
1474 flag_pcc_struct_return = 2;
1475 flag_asynchronous_unwind_tables = 2;
1478 /* Table of valid machine attributes. */
1479 const struct attribute_spec ix86_attribute_table[] =
1481 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1482 /* Stdcall attribute says callee is responsible for popping arguments
1483 if they are not variable. */
1484 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1485 /* Fastcall attribute says callee is responsible for popping arguments
1486 if they are not variable. */
1487 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1488 /* Cdecl attribute says the callee is a normal C declaration */
1489 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Regparm attribute specifies how many integer arguments are to be
1491 passed in registers. */
1492 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1493 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1494 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1495 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1496 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1498 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1499 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1500 { NULL, 0, 0, false, false, false, NULL }
1503 /* Decide whether we can make a sibling call to a function. DECL is the
1504 declaration of the function being targeted by the call and EXP is the
1505 CALL_EXPR representing the call. */
1508 ix86_function_ok_for_sibcall (tree decl, tree exp)
1510 /* If we are generating position-independent code, we cannot sibcall
1511 optimize any indirect call, or a direct call to a global function,
1512 as the PLT requires %ebx be live. */
1513 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1516 /* If we are returning floats on the 80387 register stack, we cannot
1517 make a sibcall from a function that doesn't return a float to a
1518 function that does or, conversely, from a function that does return
1519 a float to a function that doesn't; the necessary stack adjustment
1520 would not be executed. */
1521 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1522 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1525 /* If this call is indirect, we'll need to be able to use a call-clobbered
1526 register for the address of the target function. Make sure that all
1527 such registers are not used for passing parameters. */
1528 if (!decl && !TARGET_64BIT)
1532 /* We're looking at the CALL_EXPR, we need the type of the function. */
1533 type = TREE_OPERAND (exp, 0); /* pointer expression */
1534 type = TREE_TYPE (type); /* pointer type */
1535 type = TREE_TYPE (type); /* function type */
1537 if (ix86_function_regparm (type, NULL) >= 3)
1539 /* ??? Need to count the actual number of registers to be used,
1540 not the possible number of registers. Fix later. */
1545 /* Otherwise okay. That also includes certain types of indirect calls. */
1549 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1550 arguments as in struct attribute_spec.handler. */
1552 ix86_handle_cdecl_attribute (tree *node, tree name,
1553 tree args ATTRIBUTE_UNUSED,
1554 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1556 if (TREE_CODE (*node) != FUNCTION_TYPE
1557 && TREE_CODE (*node) != METHOD_TYPE
1558 && TREE_CODE (*node) != FIELD_DECL
1559 && TREE_CODE (*node) != TYPE_DECL)
1561 warning ("`%s' attribute only applies to functions",
1562 IDENTIFIER_POINTER (name));
1563 *no_add_attrs = true;
1567 if (is_attribute_p ("fastcall", name))
1569 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1571 error ("fastcall and stdcall attributes are not compatible");
1573 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1575 error ("fastcall and regparm attributes are not compatible");
1578 else if (is_attribute_p ("stdcall", name))
1580 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1582 error ("fastcall and stdcall attributes are not compatible");
1589 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1590 *no_add_attrs = true;
1596 /* Handle a "regparm" attribute;
1597 arguments as in struct attribute_spec.handler. */
1599 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1600 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1602 if (TREE_CODE (*node) != FUNCTION_TYPE
1603 && TREE_CODE (*node) != METHOD_TYPE
1604 && TREE_CODE (*node) != FIELD_DECL
1605 && TREE_CODE (*node) != TYPE_DECL)
1607 warning ("`%s' attribute only applies to functions",
1608 IDENTIFIER_POINTER (name));
1609 *no_add_attrs = true;
1615 cst = TREE_VALUE (args);
1616 if (TREE_CODE (cst) != INTEGER_CST)
1618 warning ("`%s' attribute requires an integer constant argument",
1619 IDENTIFIER_POINTER (name));
1620 *no_add_attrs = true;
1622 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1624 warning ("argument to `%s' attribute larger than %d",
1625 IDENTIFIER_POINTER (name), REGPARM_MAX);
1626 *no_add_attrs = true;
1629 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1631 error ("fastcall and regparm attributes are not compatible");
1638 /* Return 0 if the attributes for two types are incompatible, 1 if they
1639 are compatible, and 2 if they are nearly compatible (which causes a
1640 warning to be generated). */
1643 ix86_comp_type_attributes (tree type1, tree type2)
1645 /* Check for mismatch of non-default calling convention. */
1646 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1648 if (TREE_CODE (type1) != FUNCTION_TYPE)
1651 /* Check for mismatched fastcall types */
1652 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1653 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1656 /* Check for mismatched return types (cdecl vs stdcall). */
1657 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1658 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1663 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1664 DECL may be NULL when calling function indirectly
1665 or considerling a libcall. */
1668 ix86_function_regparm (tree type, tree decl)
1671 int regparm = ix86_regparm;
1672 bool user_convention = false;
1676 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1679 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1680 user_convention = true;
1683 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1686 user_convention = true;
1689 /* Use register calling convention for local functions when possible. */
1690 if (!TARGET_64BIT && !user_convention && decl
1691 && flag_unit_at_a_time && !profile_flag)
1693 struct cgraph_local_info *i = cgraph_local_info (decl);
1696 /* We can't use regparm(3) for nested functions as these use
1697 static chain pointer in third argument. */
1698 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1708 /* Return true if EAX is live at the start of the function. Used by
1709 ix86_expand_prologue to determine if we need special help before
1710 calling allocate_stack_worker. */
1713 ix86_eax_live_at_start_p (void)
1715 /* Cheat. Don't bother working forward from ix86_function_regparm
1716 to the function type to whether an actual argument is located in
1717 eax. Instead just look at cfg info, which is still close enough
1718 to correct at this point. This gives false positives for broken
1719 functions that might use uninitialized data that happens to be
1720 allocated in eax, but who cares? */
1721 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1724 /* Value is the number of bytes of arguments automatically
1725 popped when returning from a subroutine call.
1726 FUNDECL is the declaration node of the function (as a tree),
1727 FUNTYPE is the data type of the function (as a tree),
1728 or for a library call it is an identifier node for the subroutine name.
1729 SIZE is the number of bytes of arguments passed on the stack.
1731 On the 80386, the RTD insn may be used to pop them if the number
1732 of args is fixed, but if the number is variable then the caller
1733 must pop them all. RTD can't be used for library calls now
1734 because the library is compiled with the Unix compiler.
1735 Use of RTD is a selectable option, since it is incompatible with
1736 standard Unix calling sequences. If the option is not selected,
1737 the caller must always pop the args.
1739 The attribute stdcall is equivalent to RTD on a per module basis. */
1742 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1744 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1746 /* Cdecl functions override -mrtd, and never pop the stack. */
1747 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1749 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1750 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1751 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1755 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1756 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1757 == void_type_node)))
1761 /* Lose any fake structure return argument if it is passed on the stack. */
1762 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1765 int nregs = ix86_function_regparm (funtype, fundecl);
1768 return GET_MODE_SIZE (Pmode);
1774 /* Argument support functions. */
1776 /* Return true when register may be used to pass function parameters. */
1778 ix86_function_arg_regno_p (int regno)
1782 return (regno < REGPARM_MAX
1783 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1784 if (SSE_REGNO_P (regno) && TARGET_SSE)
1786 /* RAX is used as hidden argument to va_arg functions. */
1789 for (i = 0; i < REGPARM_MAX; i++)
1790 if (regno == x86_64_int_parameter_registers[i])
1795 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1796 for a call to a function whose data type is FNTYPE.
1797 For a library call, FNTYPE is 0. */
1800 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1801 tree fntype, /* tree ptr for function decl */
1802 rtx libname, /* SYMBOL_REF of library name or 0 */
1805 static CUMULATIVE_ARGS zero_cum;
1806 tree param, next_param;
1808 if (TARGET_DEBUG_ARG)
1810 fprintf (stderr, "\ninit_cumulative_args (");
1812 fprintf (stderr, "fntype code = %s, ret code = %s",
1813 tree_code_name[(int) TREE_CODE (fntype)],
1814 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1816 fprintf (stderr, "no fntype");
1819 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1824 /* Set up the number of registers to use for passing arguments. */
1826 cum->nregs = ix86_function_regparm (fntype, fndecl);
1828 cum->nregs = ix86_regparm;
1829 cum->sse_nregs = SSE_REGPARM_MAX;
1830 cum->maybe_vaarg = false;
1832 /* Use ecx and edx registers if function has fastcall attribute */
1833 if (fntype && !TARGET_64BIT)
1835 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1843 /* Determine if this function has variable arguments. This is
1844 indicated by the last argument being 'void_type_mode' if there
1845 are no variable arguments. If there are variable arguments, then
1846 we won't pass anything in registers */
1850 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1851 param != 0; param = next_param)
1853 next_param = TREE_CHAIN (param);
1854 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1861 cum->maybe_vaarg = true;
1865 if ((!fntype && !libname)
1866 || (fntype && !TYPE_ARG_TYPES (fntype)))
1867 cum->maybe_vaarg = 1;
1869 if (TARGET_DEBUG_ARG)
1870 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1875 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1876 of this code is to classify each 8bytes of incoming argument by the register
1877 class and assign registers accordingly. */
1879 /* Return the union class of CLASS1 and CLASS2.
1880 See the x86-64 PS ABI for details. */
1882 static enum x86_64_reg_class
1883 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1885 /* Rule #1: If both classes are equal, this is the resulting class. */
1886 if (class1 == class2)
1889 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1891 if (class1 == X86_64_NO_CLASS)
1893 if (class2 == X86_64_NO_CLASS)
1896 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1897 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1898 return X86_64_MEMORY_CLASS;
1900 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1901 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1902 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1903 return X86_64_INTEGERSI_CLASS;
1904 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1905 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1906 return X86_64_INTEGER_CLASS;
1908 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1909 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1910 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1911 return X86_64_MEMORY_CLASS;
1913 /* Rule #6: Otherwise class SSE is used. */
1914 return X86_64_SSE_CLASS;
1917 /* Classify the argument of type TYPE and mode MODE.
1918 CLASSES will be filled by the register class used to pass each word
1919 of the operand. The number of words is returned. In case the parameter
1920 should be passed in memory, 0 is returned. As a special case for zero
1921 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1923 BIT_OFFSET is used internally for handling records and specifies offset
1924 of the offset in bits modulo 256 to avoid overflow cases.
1926 See the x86-64 PS ABI for details.
1930 classify_argument (enum machine_mode mode, tree type,
1931 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1934 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1935 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1937 /* Variable sized entities are always passed/returned in memory. */
1941 if (mode != VOIDmode
1942 && MUST_PASS_IN_STACK (mode, type))
1945 if (type && AGGREGATE_TYPE_P (type))
1949 enum x86_64_reg_class subclasses[MAX_CLASSES];
1951 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1955 for (i = 0; i < words; i++)
1956 classes[i] = X86_64_NO_CLASS;
1958 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1959 signalize memory class, so handle it as special case. */
1962 classes[0] = X86_64_NO_CLASS;
1966 /* Classify each field of record and merge classes. */
1967 if (TREE_CODE (type) == RECORD_TYPE)
1969 /* For classes first merge in the field of the subclasses. */
1970 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1972 tree bases = TYPE_BINFO_BASETYPES (type);
1973 int n_bases = TREE_VEC_LENGTH (bases);
1976 for (i = 0; i < n_bases; ++i)
1978 tree binfo = TREE_VEC_ELT (bases, i);
1980 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1981 tree type = BINFO_TYPE (binfo);
1983 num = classify_argument (TYPE_MODE (type),
1985 (offset + bit_offset) % 256);
1988 for (i = 0; i < num; i++)
1990 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1992 merge_classes (subclasses[i], classes[i + pos]);
1996 /* And now merge the fields of structure. */
1997 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1999 if (TREE_CODE (field) == FIELD_DECL)
2003 /* Bitfields are always classified as integer. Handle them
2004 early, since later code would consider them to be
2005 misaligned integers. */
2006 if (DECL_BIT_FIELD (field))
2008 for (i = int_bit_position (field) / 8 / 8;
2009 i < (int_bit_position (field)
2010 + tree_low_cst (DECL_SIZE (field), 0)
2013 merge_classes (X86_64_INTEGER_CLASS,
2018 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2019 TREE_TYPE (field), subclasses,
2020 (int_bit_position (field)
2021 + bit_offset) % 256);
2024 for (i = 0; i < num; i++)
2027 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2029 merge_classes (subclasses[i], classes[i + pos]);
2035 /* Arrays are handled as small records. */
2036 else if (TREE_CODE (type) == ARRAY_TYPE)
2039 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2040 TREE_TYPE (type), subclasses, bit_offset);
2044 /* The partial classes are now full classes. */
2045 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2046 subclasses[0] = X86_64_SSE_CLASS;
2047 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2048 subclasses[0] = X86_64_INTEGER_CLASS;
2050 for (i = 0; i < words; i++)
2051 classes[i] = subclasses[i % num];
2053 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2054 else if (TREE_CODE (type) == UNION_TYPE
2055 || TREE_CODE (type) == QUAL_UNION_TYPE)
2057 /* For classes first merge in the field of the subclasses. */
2058 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2060 tree bases = TYPE_BINFO_BASETYPES (type);
2061 int n_bases = TREE_VEC_LENGTH (bases);
2064 for (i = 0; i < n_bases; ++i)
2066 tree binfo = TREE_VEC_ELT (bases, i);
2068 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2069 tree type = BINFO_TYPE (binfo);
2071 num = classify_argument (TYPE_MODE (type),
2073 (offset + (bit_offset % 64)) % 256);
2076 for (i = 0; i < num; i++)
2078 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2080 merge_classes (subclasses[i], classes[i + pos]);
2084 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2086 if (TREE_CODE (field) == FIELD_DECL)
2089 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2090 TREE_TYPE (field), subclasses,
2094 for (i = 0; i < num; i++)
2095 classes[i] = merge_classes (subclasses[i], classes[i]);
2099 else if (TREE_CODE (type) == SET_TYPE)
2103 classes[0] = X86_64_INTEGERSI_CLASS;
2106 else if (bytes <= 8)
2108 classes[0] = X86_64_INTEGER_CLASS;
2111 else if (bytes <= 12)
2113 classes[0] = X86_64_INTEGER_CLASS;
2114 classes[1] = X86_64_INTEGERSI_CLASS;
2119 classes[0] = X86_64_INTEGER_CLASS;
2120 classes[1] = X86_64_INTEGER_CLASS;
2127 /* Final merger cleanup. */
2128 for (i = 0; i < words; i++)
2130 /* If one class is MEMORY, everything should be passed in
2132 if (classes[i] == X86_64_MEMORY_CLASS)
2135 /* The X86_64_SSEUP_CLASS should be always preceded by
2136 X86_64_SSE_CLASS. */
2137 if (classes[i] == X86_64_SSEUP_CLASS
2138 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2139 classes[i] = X86_64_SSE_CLASS;
2141 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2142 if (classes[i] == X86_64_X87UP_CLASS
2143 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2144 classes[i] = X86_64_SSE_CLASS;
2149 /* Compute alignment needed. We align all types to natural boundaries with
2150 exception of XFmode that is aligned to 64bits. */
2151 if (mode != VOIDmode && mode != BLKmode)
2153 int mode_alignment = GET_MODE_BITSIZE (mode);
2156 mode_alignment = 128;
2157 else if (mode == XCmode)
2158 mode_alignment = 256;
2159 /* Misaligned fields are always returned in memory. */
2160 if (bit_offset % mode_alignment)
2164 /* Classification of atomic types. */
2174 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2175 classes[0] = X86_64_INTEGERSI_CLASS;
2177 classes[0] = X86_64_INTEGER_CLASS;
2181 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2184 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2185 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2188 if (!(bit_offset % 64))
2189 classes[0] = X86_64_SSESF_CLASS;
2191 classes[0] = X86_64_SSE_CLASS;
2194 classes[0] = X86_64_SSEDF_CLASS;
2197 classes[0] = X86_64_X87_CLASS;
2198 classes[1] = X86_64_X87UP_CLASS;
2201 classes[0] = X86_64_X87_CLASS;
2202 classes[1] = X86_64_X87UP_CLASS;
2203 classes[2] = X86_64_X87_CLASS;
2204 classes[3] = X86_64_X87UP_CLASS;
2207 classes[0] = X86_64_SSEDF_CLASS;
2208 classes[1] = X86_64_SSEDF_CLASS;
2211 classes[0] = X86_64_SSE_CLASS;
2219 classes[0] = X86_64_SSE_CLASS;
2220 classes[1] = X86_64_SSEUP_CLASS;
2235 /* Examine the argument and return set number of register required in each
2236 class. Return 0 iff parameter should be passed in memory. */
2238 examine_argument (enum machine_mode mode, tree type, int in_return,
2239 int *int_nregs, int *sse_nregs)
2241 enum x86_64_reg_class class[MAX_CLASSES];
2242 int n = classify_argument (mode, type, class, 0);
2248 for (n--; n >= 0; n--)
2251 case X86_64_INTEGER_CLASS:
2252 case X86_64_INTEGERSI_CLASS:
2255 case X86_64_SSE_CLASS:
2256 case X86_64_SSESF_CLASS:
2257 case X86_64_SSEDF_CLASS:
2260 case X86_64_NO_CLASS:
2261 case X86_64_SSEUP_CLASS:
2263 case X86_64_X87_CLASS:
2264 case X86_64_X87UP_CLASS:
2268 case X86_64_MEMORY_CLASS:
2273 /* Construct container for the argument used by GCC interface. See
2274 FUNCTION_ARG for the detailed description. */
2276 construct_container (enum machine_mode mode, tree type, int in_return,
2277 int nintregs, int nsseregs, const int * intreg,
2280 enum machine_mode tmpmode;
2282 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2283 enum x86_64_reg_class class[MAX_CLASSES];
2287 int needed_sseregs, needed_intregs;
2288 rtx exp[MAX_CLASSES];
2291 n = classify_argument (mode, type, class, 0);
2292 if (TARGET_DEBUG_ARG)
2295 fprintf (stderr, "Memory class\n");
2298 fprintf (stderr, "Classes:");
2299 for (i = 0; i < n; i++)
2301 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2303 fprintf (stderr, "\n");
2308 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2310 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2313 /* First construct simple cases. Avoid SCmode, since we want to use
2314 single register to pass this type. */
2315 if (n == 1 && mode != SCmode)
2318 case X86_64_INTEGER_CLASS:
2319 case X86_64_INTEGERSI_CLASS:
2320 return gen_rtx_REG (mode, intreg[0]);
2321 case X86_64_SSE_CLASS:
2322 case X86_64_SSESF_CLASS:
2323 case X86_64_SSEDF_CLASS:
2324 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2325 case X86_64_X87_CLASS:
2326 return gen_rtx_REG (mode, FIRST_STACK_REG);
2327 case X86_64_NO_CLASS:
2328 /* Zero sized array, struct or class. */
2333 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2334 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2336 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2337 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2338 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2339 && class[1] == X86_64_INTEGER_CLASS
2340 && (mode == CDImode || mode == TImode)
2341 && intreg[0] + 1 == intreg[1])
2342 return gen_rtx_REG (mode, intreg[0]);
2344 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2345 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2346 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2348 /* Otherwise figure out the entries of the PARALLEL. */
2349 for (i = 0; i < n; i++)
2353 case X86_64_NO_CLASS:
2355 case X86_64_INTEGER_CLASS:
2356 case X86_64_INTEGERSI_CLASS:
2357 /* Merge TImodes on aligned occasions here too. */
2358 if (i * 8 + 8 > bytes)
2359 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2360 else if (class[i] == X86_64_INTEGERSI_CLASS)
2364 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2365 if (tmpmode == BLKmode)
2367 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2368 gen_rtx_REG (tmpmode, *intreg),
2372 case X86_64_SSESF_CLASS:
2373 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2374 gen_rtx_REG (SFmode,
2375 SSE_REGNO (sse_regno)),
2379 case X86_64_SSEDF_CLASS:
2380 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2381 gen_rtx_REG (DFmode,
2382 SSE_REGNO (sse_regno)),
2386 case X86_64_SSE_CLASS:
2387 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2391 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2392 gen_rtx_REG (tmpmode,
2393 SSE_REGNO (sse_regno)),
2395 if (tmpmode == TImode)
2403 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2404 for (i = 0; i < nexps; i++)
2405 XVECEXP (ret, 0, i) = exp [i];
2409 /* Update the data in CUM to advance over an argument
2410 of mode MODE and data type TYPE.
2411 (TYPE is null for libcalls where that information may not be available.) */
2414 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2415 enum machine_mode mode, /* current arg mode */
2416 tree type, /* type of the argument or 0 if lib support */
2417 int named) /* whether or not the argument was named */
2420 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2421 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2423 if (TARGET_DEBUG_ARG)
2425 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2426 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2429 int int_nregs, sse_nregs;
2430 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2431 cum->words += words;
2432 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2434 cum->nregs -= int_nregs;
2435 cum->sse_nregs -= sse_nregs;
2436 cum->regno += int_nregs;
2437 cum->sse_regno += sse_nregs;
2440 cum->words += words;
2444 if (TARGET_SSE && mode == TImode)
2446 cum->sse_words += words;
2447 cum->sse_nregs -= 1;
2448 cum->sse_regno += 1;
2449 if (cum->sse_nregs <= 0)
2457 cum->words += words;
2458 cum->nregs -= words;
2459 cum->regno += words;
2461 if (cum->nregs <= 0)
2471 /* Define where to put the arguments to a function.
2472 Value is zero to push the argument on the stack,
2473 or a hard register in which to store the argument.
2475 MODE is the argument's machine mode.
2476 TYPE is the data type of the argument (as a tree).
2477 This is null for libcalls where that information may
2479 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2480 the preceding args and about the function being called.
2481 NAMED is nonzero if this argument is a named parameter
2482 (otherwise it is an extra parameter matching an ellipsis). */
2485 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2486 enum machine_mode mode, /* current arg mode */
2487 tree type, /* type of the argument or 0 if lib support */
2488 int named) /* != 0 for normal args, == 0 for ... args */
2492 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2493 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2495 /* Handle a hidden AL argument containing number of registers for varargs
2496 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2498 if (mode == VOIDmode)
2501 return GEN_INT (cum->maybe_vaarg
2502 ? (cum->sse_nregs < 0
2510 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2511 &x86_64_int_parameter_registers [cum->regno],
2516 /* For now, pass fp/complex values on the stack. */
2528 if (words <= cum->nregs)
2530 int regno = cum->regno;
2532 /* Fastcall allocates the first two DWORD (SImode) or
2533 smaller arguments to ECX and EDX. */
2536 if (mode == BLKmode || mode == DImode)
2539 /* ECX not EAX is the first allocated register. */
2543 ret = gen_rtx_REG (mode, regno);
2548 ret = gen_rtx_REG (mode, cum->sse_regno);
2552 if (TARGET_DEBUG_ARG)
2555 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2556 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2559 print_simple_rtl (stderr, ret);
2561 fprintf (stderr, ", stack");
2563 fprintf (stderr, " )\n");
2569 /* A C expression that indicates when an argument must be passed by
2570 reference. If nonzero for an argument, a copy of that argument is
2571 made in memory and a pointer to the argument is passed instead of
2572 the argument itself. The pointer is passed in whatever way is
2573 appropriate for passing a pointer to that type. */
2576 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2577 enum machine_mode mode ATTRIBUTE_UNUSED,
2578 tree type, int named ATTRIBUTE_UNUSED)
2583 if (type && int_size_in_bytes (type) == -1)
2585 if (TARGET_DEBUG_ARG)
2586 fprintf (stderr, "function_arg_pass_by_reference\n");
2593 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2596 contains_128bit_aligned_vector_p (tree type)
2598 enum machine_mode mode = TYPE_MODE (type);
2599 if (SSE_REG_MODE_P (mode)
2600 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2602 if (TYPE_ALIGN (type) < 128)
2605 if (AGGREGATE_TYPE_P (type))
2607 /* Walk the aggregates recursively. */
2608 if (TREE_CODE (type) == RECORD_TYPE
2609 || TREE_CODE (type) == UNION_TYPE
2610 || TREE_CODE (type) == QUAL_UNION_TYPE)
2614 if (TYPE_BINFO (type) != NULL
2615 && TYPE_BINFO_BASETYPES (type) != NULL)
2617 tree bases = TYPE_BINFO_BASETYPES (type);
2618 int n_bases = TREE_VEC_LENGTH (bases);
2621 for (i = 0; i < n_bases; ++i)
2623 tree binfo = TREE_VEC_ELT (bases, i);
2624 tree type = BINFO_TYPE (binfo);
2626 if (contains_128bit_aligned_vector_p (type))
2630 /* And now merge the fields of structure. */
2631 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2633 if (TREE_CODE (field) == FIELD_DECL
2634 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2638 /* Just for use if some languages passes arrays by value. */
2639 else if (TREE_CODE (type) == ARRAY_TYPE)
2641 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2650 /* Gives the alignment boundary, in bits, of an argument with the
2651 specified mode and type. */
2654 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2658 align = TYPE_ALIGN (type);
2660 align = GET_MODE_ALIGNMENT (mode);
2661 if (align < PARM_BOUNDARY)
2662 align = PARM_BOUNDARY;
2665 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2666 make an exception for SSE modes since these require 128bit
2669 The handling here differs from field_alignment. ICC aligns MMX
2670 arguments to 4 byte boundaries, while structure fields are aligned
2671 to 8 byte boundaries. */
2674 if (!SSE_REG_MODE_P (mode))
2675 align = PARM_BOUNDARY;
2679 if (!contains_128bit_aligned_vector_p (type))
2680 align = PARM_BOUNDARY;
2688 /* Return true if N is a possible register number of function value. */
2690 ix86_function_value_regno_p (int regno)
2694 return ((regno) == 0
2695 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2696 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2698 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2699 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2700 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2703 /* Define how to find the value returned by a function.
2704 VALTYPE is the data type of the value (as a tree).
2705 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2706 otherwise, FUNC is 0. */
2708 ix86_function_value (tree valtype)
2712 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2713 REGPARM_MAX, SSE_REGPARM_MAX,
2714 x86_64_int_return_registers, 0);
2715 /* For zero sized structures, construct_container return NULL, but we need
2716 to keep rest of compiler happy by returning meaningful value. */
2718 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2722 return gen_rtx_REG (TYPE_MODE (valtype),
2723 ix86_value_regno (TYPE_MODE (valtype)));
2726 /* Return false iff type is returned in memory. */
2728 ix86_return_in_memory (tree type)
2730 int needed_intregs, needed_sseregs, size;
2731 enum machine_mode mode = TYPE_MODE (type);
2734 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2736 if (mode == BLKmode)
2739 size = int_size_in_bytes (type);
2741 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2744 if (VECTOR_MODE_P (mode) || mode == TImode)
2746 /* User-created vectors small enough to fit in EAX. */
2750 /* MMX/3dNow values are returned on the stack, since we've
2751 got to EMMS/FEMMS before returning. */
2755 /* SSE values are returned in XMM0. */
2756 /* ??? Except when it doesn't exist? We have a choice of
2757 either (1) being abi incompatible with a -march switch,
2758 or (2) generating an error here. Given no good solution,
2759 I think the safest thing is one warning. The user won't
2760 be able to use -Werror, but... */
2771 warning ("SSE vector return without SSE enabled "
2785 /* Define how to find the value returned by a library function
2786 assuming the value has mode MODE. */
2788 ix86_libcall_value (enum machine_mode mode)
2798 return gen_rtx_REG (mode, FIRST_SSE_REG);
2801 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2803 return gen_rtx_REG (mode, 0);
2807 return gen_rtx_REG (mode, ix86_value_regno (mode));
2810 /* Given a mode, return the register to use for a return value. */
2813 ix86_value_regno (enum machine_mode mode)
2815 /* Floating point return values in %st(0). */
2816 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2817 return FIRST_FLOAT_REG;
2818 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2819 we prevent this case when sse is not available. */
2820 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2821 return FIRST_SSE_REG;
2822 /* Everything else in %eax. */
2826 /* Create the va_list data type. */
2829 ix86_build_va_list (void)
2831 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2833 /* For i386 we use plain pointer to argument area. */
2835 return build_pointer_type (char_type_node);
2837 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2838 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2840 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2841 unsigned_type_node);
2842 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2843 unsigned_type_node);
2844 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2846 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2849 DECL_FIELD_CONTEXT (f_gpr) = record;
2850 DECL_FIELD_CONTEXT (f_fpr) = record;
2851 DECL_FIELD_CONTEXT (f_ovf) = record;
2852 DECL_FIELD_CONTEXT (f_sav) = record;
2854 TREE_CHAIN (record) = type_decl;
2855 TYPE_NAME (record) = type_decl;
2856 TYPE_FIELDS (record) = f_gpr;
2857 TREE_CHAIN (f_gpr) = f_fpr;
2858 TREE_CHAIN (f_fpr) = f_ovf;
2859 TREE_CHAIN (f_ovf) = f_sav;
2861 layout_type (record);
2863 /* The correct type is an array type of one element. */
2864 return build_array_type (record, build_index_type (size_zero_node));
2867 /* Perform any needed actions needed for a function that is receiving a
2868 variable number of arguments.
2872 MODE and TYPE are the mode and type of the current parameter.
2874 PRETEND_SIZE is a variable that should be set to the amount of stack
2875 that must be pushed by the prolog to pretend that our caller pushed
2878 Normally, this macro will push all remaining incoming registers on the
2879 stack and set PRETEND_SIZE to the length of the registers pushed. */
2882 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2883 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2886 CUMULATIVE_ARGS next_cum;
2887 rtx save_area = NULL_RTX, mem;
2900 /* Indicate to allocate space on the stack for varargs save area. */
2901 ix86_save_varrargs_registers = 1;
2903 cfun->stack_alignment_needed = 128;
2905 fntype = TREE_TYPE (current_function_decl);
2906 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2907 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2908 != void_type_node));
2910 /* For varargs, we do not want to skip the dummy va_dcl argument.
2911 For stdargs, we do want to skip the last named argument. */
2914 function_arg_advance (&next_cum, mode, type, 1);
2917 save_area = frame_pointer_rtx;
2919 set = get_varargs_alias_set ();
2921 for (i = next_cum.regno; i < ix86_regparm; i++)
2923 mem = gen_rtx_MEM (Pmode,
2924 plus_constant (save_area, i * UNITS_PER_WORD));
2925 set_mem_alias_set (mem, set);
2926 emit_move_insn (mem, gen_rtx_REG (Pmode,
2927 x86_64_int_parameter_registers[i]));
2930 if (next_cum.sse_nregs)
2932 /* Now emit code to save SSE registers. The AX parameter contains number
2933 of SSE parameter registers used to call this function. We use
2934 sse_prologue_save insn template that produces computed jump across
2935 SSE saves. We need some preparation work to get this working. */
2937 label = gen_label_rtx ();
2938 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2940 /* Compute address to jump to :
2941 label - 5*eax + nnamed_sse_arguments*5 */
2942 tmp_reg = gen_reg_rtx (Pmode);
2943 nsse_reg = gen_reg_rtx (Pmode);
2944 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2945 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2946 gen_rtx_MULT (Pmode, nsse_reg,
2948 if (next_cum.sse_regno)
2951 gen_rtx_CONST (DImode,
2952 gen_rtx_PLUS (DImode,
2954 GEN_INT (next_cum.sse_regno * 4))));
2956 emit_move_insn (nsse_reg, label_ref);
2957 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2959 /* Compute address of memory block we save into. We always use pointer
2960 pointing 127 bytes after first byte to store - this is needed to keep
2961 instruction size limited by 4 bytes. */
2962 tmp_reg = gen_reg_rtx (Pmode);
2963 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2964 plus_constant (save_area,
2965 8 * REGPARM_MAX + 127)));
2966 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2967 set_mem_alias_set (mem, set);
2968 set_mem_align (mem, BITS_PER_WORD);
2970 /* And finally do the dirty job! */
2971 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2972 GEN_INT (next_cum.sse_regno), label));
2977 /* Implement va_start. */
2980 ix86_va_start (tree valist, rtx nextarg)
2982 HOST_WIDE_INT words, n_gpr, n_fpr;
2983 tree f_gpr, f_fpr, f_ovf, f_sav;
2984 tree gpr, fpr, ovf, sav, t;
2986 /* Only 64bit target needs something special. */
2989 std_expand_builtin_va_start (valist, nextarg);
2993 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2994 f_fpr = TREE_CHAIN (f_gpr);
2995 f_ovf = TREE_CHAIN (f_fpr);
2996 f_sav = TREE_CHAIN (f_ovf);
2998 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2999 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3000 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3001 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3002 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3004 /* Count number of gp and fp argument registers used. */
3005 words = current_function_args_info.words;
3006 n_gpr = current_function_args_info.regno;
3007 n_fpr = current_function_args_info.sse_regno;
3009 if (TARGET_DEBUG_ARG)
3010 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3011 (int) words, (int) n_gpr, (int) n_fpr);
3013 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3014 build_int_2 (n_gpr * 8, 0));
3015 TREE_SIDE_EFFECTS (t) = 1;
3016 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3018 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3019 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3020 TREE_SIDE_EFFECTS (t) = 1;
3021 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3023 /* Find the overflow area. */
3024 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3026 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3027 build_int_2 (words * UNITS_PER_WORD, 0));
3028 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3029 TREE_SIDE_EFFECTS (t) = 1;
3030 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3032 /* Find the register save area.
3033 Prologue of the function save it right above stack frame. */
3034 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3035 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3036 TREE_SIDE_EFFECTS (t) = 1;
3037 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3040 /* Implement va_arg. */
3042 ix86_va_arg (tree valist, tree type)
3044 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3045 tree f_gpr, f_fpr, f_ovf, f_sav;
3046 tree gpr, fpr, ovf, sav, t;
3048 rtx lab_false, lab_over = NULL_RTX;
3053 /* Only 64bit target needs something special. */
3056 return std_expand_builtin_va_arg (valist, type);
3059 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3060 f_fpr = TREE_CHAIN (f_gpr);
3061 f_ovf = TREE_CHAIN (f_fpr);
3062 f_sav = TREE_CHAIN (f_ovf);
3064 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3065 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3066 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3067 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3068 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3070 size = int_size_in_bytes (type);
3073 /* Passed by reference. */
3075 type = build_pointer_type (type);
3076 size = int_size_in_bytes (type);
3078 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3080 container = construct_container (TYPE_MODE (type), type, 0,
3081 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3083 * Pull the value out of the saved registers ...
3086 addr_rtx = gen_reg_rtx (Pmode);
3090 rtx int_addr_rtx, sse_addr_rtx;
3091 int needed_intregs, needed_sseregs;
3094 lab_over = gen_label_rtx ();
3095 lab_false = gen_label_rtx ();
3097 examine_argument (TYPE_MODE (type), type, 0,
3098 &needed_intregs, &needed_sseregs);
3101 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3102 || TYPE_ALIGN (type) > 128);
3104 /* In case we are passing structure, verify that it is consecutive block
3105 on the register save area. If not we need to do moves. */
3106 if (!need_temp && !REG_P (container))
3108 /* Verify that all registers are strictly consecutive */
3109 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3113 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3115 rtx slot = XVECEXP (container, 0, i);
3116 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3117 || INTVAL (XEXP (slot, 1)) != i * 16)
3125 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3127 rtx slot = XVECEXP (container, 0, i);
3128 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3129 || INTVAL (XEXP (slot, 1)) != i * 8)
3136 int_addr_rtx = addr_rtx;
3137 sse_addr_rtx = addr_rtx;
3141 int_addr_rtx = gen_reg_rtx (Pmode);
3142 sse_addr_rtx = gen_reg_rtx (Pmode);
3144 /* First ensure that we fit completely in registers. */
3147 emit_cmp_and_jump_insns (expand_expr
3148 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3149 GEN_INT ((REGPARM_MAX - needed_intregs +
3150 1) * 8), GE, const1_rtx, SImode,
3155 emit_cmp_and_jump_insns (expand_expr
3156 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3157 GEN_INT ((SSE_REGPARM_MAX -
3158 needed_sseregs + 1) * 16 +
3159 REGPARM_MAX * 8), GE, const1_rtx,
3160 SImode, 1, lab_false);
3163 /* Compute index to start of area used for integer regs. */
3166 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3167 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3168 if (r != int_addr_rtx)
3169 emit_move_insn (int_addr_rtx, r);
3173 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3174 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3175 if (r != sse_addr_rtx)
3176 emit_move_insn (sse_addr_rtx, r);
3184 /* Never use the memory itself, as it has the alias set. */
3185 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3186 mem = gen_rtx_MEM (BLKmode, x);
3187 force_operand (x, addr_rtx);
3188 set_mem_alias_set (mem, get_varargs_alias_set ());
3189 set_mem_align (mem, BITS_PER_UNIT);
3191 for (i = 0; i < XVECLEN (container, 0); i++)
3193 rtx slot = XVECEXP (container, 0, i);
3194 rtx reg = XEXP (slot, 0);
3195 enum machine_mode mode = GET_MODE (reg);
3201 if (SSE_REGNO_P (REGNO (reg)))
3203 src_addr = sse_addr_rtx;
3204 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3208 src_addr = int_addr_rtx;
3209 src_offset = REGNO (reg) * 8;
3211 src_mem = gen_rtx_MEM (mode, src_addr);
3212 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3213 src_mem = adjust_address (src_mem, mode, src_offset);
3214 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3215 emit_move_insn (dest_mem, src_mem);
3222 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3223 build_int_2 (needed_intregs * 8, 0));
3224 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3225 TREE_SIDE_EFFECTS (t) = 1;
3226 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3231 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3232 build_int_2 (needed_sseregs * 16, 0));
3233 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3234 TREE_SIDE_EFFECTS (t) = 1;
3235 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3238 emit_jump_insn (gen_jump (lab_over));
3240 emit_label (lab_false);
3243 /* ... otherwise out of the overflow area. */
3245 /* Care for on-stack alignment if needed. */
3246 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3250 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3251 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3252 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3256 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3258 emit_move_insn (addr_rtx, r);
3261 build (PLUS_EXPR, TREE_TYPE (t), t,
3262 build_int_2 (rsize * UNITS_PER_WORD, 0));
3263 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3264 TREE_SIDE_EFFECTS (t) = 1;
3265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3268 emit_label (lab_over);
3272 r = gen_rtx_MEM (Pmode, addr_rtx);
3273 set_mem_alias_set (r, get_varargs_alias_set ());
3274 emit_move_insn (addr_rtx, r);
3280 /* Return nonzero if OP is either a i387 or SSE fp register. */
3282 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3284 return ANY_FP_REG_P (op);
3287 /* Return nonzero if OP is an i387 fp register. */
3289 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3291 return FP_REG_P (op);
3294 /* Return nonzero if OP is a non-fp register_operand. */
3296 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3298 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3301 /* Return nonzero if OP is a register operand other than an
3302 i387 fp register. */
3304 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3306 return register_operand (op, mode) && !FP_REG_P (op);
3309 /* Return nonzero if OP is general operand representable on x86_64. */
3312 x86_64_general_operand (rtx op, enum machine_mode mode)
3315 return general_operand (op, mode);
3316 if (nonimmediate_operand (op, mode))
3318 return x86_64_sign_extended_value (op);
3321 /* Return nonzero if OP is general operand representable on x86_64
3322 as either sign extended or zero extended constant. */
3325 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3328 return general_operand (op, mode);
3329 if (nonimmediate_operand (op, mode))
3331 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3334 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3337 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3340 return nonmemory_operand (op, mode);
3341 if (register_operand (op, mode))
3343 return x86_64_sign_extended_value (op);
3346 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3349 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3351 if (!TARGET_64BIT || !flag_pic)
3352 return nonmemory_operand (op, mode);
3353 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3355 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3360 /* Return nonzero if OPNUM's MEM should be matched
3361 in movabs* patterns. */
3364 ix86_check_movabs (rtx insn, int opnum)
3368 set = PATTERN (insn);
3369 if (GET_CODE (set) == PARALLEL)
3370 set = XVECEXP (set, 0, 0);
3371 if (GET_CODE (set) != SET)
3373 mem = XEXP (set, opnum);
3374 while (GET_CODE (mem) == SUBREG)
3375 mem = SUBREG_REG (mem);
3376 if (GET_CODE (mem) != MEM)
3378 return (volatile_ok || !MEM_VOLATILE_P (mem));
3381 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3384 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3387 return nonmemory_operand (op, mode);
3388 if (register_operand (op, mode))
3390 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3393 /* Return nonzero if OP is immediate operand representable on x86_64. */
3396 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3399 return immediate_operand (op, mode);
3400 return x86_64_sign_extended_value (op);
3403 /* Return nonzero if OP is immediate operand representable on x86_64. */
3406 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3408 return x86_64_zero_extended_value (op);
3411 /* Return nonzero if OP is (const_int 1), else return zero. */
3414 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3416 return op == const1_rtx;
3419 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3420 for shift & compare patterns, as shifting by 0 does not change flags),
3421 else return zero. */
3424 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3426 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3429 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3430 reference and a constant. */
3433 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3435 switch (GET_CODE (op))
3443 if (GET_CODE (op) == SYMBOL_REF
3444 || GET_CODE (op) == LABEL_REF
3445 || (GET_CODE (op) == UNSPEC
3446 && (XINT (op, 1) == UNSPEC_GOT
3447 || XINT (op, 1) == UNSPEC_GOTOFF
3448 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3450 if (GET_CODE (op) != PLUS
3451 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3455 if (GET_CODE (op) == SYMBOL_REF
3456 || GET_CODE (op) == LABEL_REF)
3458 /* Only @GOTOFF gets offsets. */
3459 if (GET_CODE (op) != UNSPEC
3460 || XINT (op, 1) != UNSPEC_GOTOFF)
3463 op = XVECEXP (op, 0, 0);
3464 if (GET_CODE (op) == SYMBOL_REF
3465 || GET_CODE (op) == LABEL_REF)
3474 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3477 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3479 if (GET_CODE (op) != CONST)
3484 if (GET_CODE (op) == UNSPEC
3485 && XINT (op, 1) == UNSPEC_GOTPCREL)
3487 if (GET_CODE (op) == PLUS
3488 && GET_CODE (XEXP (op, 0)) == UNSPEC
3489 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3494 if (GET_CODE (op) == UNSPEC)
3496 if (GET_CODE (op) != PLUS
3497 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3500 if (GET_CODE (op) == UNSPEC)
3506 /* Return true if OP is a symbolic operand that resolves locally. */
3509 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3511 if (GET_CODE (op) == CONST
3512 && GET_CODE (XEXP (op, 0)) == PLUS
3513 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3514 op = XEXP (XEXP (op, 0), 0);
3516 if (GET_CODE (op) == LABEL_REF)
3519 if (GET_CODE (op) != SYMBOL_REF)
3522 if (SYMBOL_REF_LOCAL_P (op))
3525 /* There is, however, a not insubstantial body of code in the rest of
3526 the compiler that assumes it can just stick the results of
3527 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3528 /* ??? This is a hack. Should update the body of the compiler to
3529 always create a DECL an invoke targetm.encode_section_info. */
3530 if (strncmp (XSTR (op, 0), internal_label_prefix,
3531 internal_label_prefix_len) == 0)
3537 /* Test for various thread-local symbols. */
3540 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3542 if (GET_CODE (op) != SYMBOL_REF)
3544 return SYMBOL_REF_TLS_MODEL (op);
3548 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3550 if (GET_CODE (op) != SYMBOL_REF)
3552 return SYMBOL_REF_TLS_MODEL (op) == kind;
3556 global_dynamic_symbolic_operand (register rtx op,
3557 enum machine_mode mode ATTRIBUTE_UNUSED)
3559 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3563 local_dynamic_symbolic_operand (register rtx op,
3564 enum machine_mode mode ATTRIBUTE_UNUSED)
3566 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3570 initial_exec_symbolic_operand (register rtx op,
3571 enum machine_mode mode ATTRIBUTE_UNUSED)
3573 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3577 local_exec_symbolic_operand (register rtx op,
3578 enum machine_mode mode ATTRIBUTE_UNUSED)
3580 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3583 /* Test for a valid operand for a call instruction. Don't allow the
3584 arg pointer register or virtual regs since they may decay into
3585 reg + const, which the patterns can't handle. */
3588 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3590 /* Disallow indirect through a virtual register. This leads to
3591 compiler aborts when trying to eliminate them. */
3592 if (GET_CODE (op) == REG
3593 && (op == arg_pointer_rtx
3594 || op == frame_pointer_rtx
3595 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3596 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3599 /* Disallow `call 1234'. Due to varying assembler lameness this
3600 gets either rejected or translated to `call .+1234'. */
3601 if (GET_CODE (op) == CONST_INT)
3604 /* Explicitly allow SYMBOL_REF even if pic. */
3605 if (GET_CODE (op) == SYMBOL_REF)
3608 /* Otherwise we can allow any general_operand in the address. */
3609 return general_operand (op, Pmode);
3612 /* Test for a valid operand for a call instruction. Don't allow the
3613 arg pointer register or virtual regs since they may decay into
3614 reg + const, which the patterns can't handle. */
3617 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3619 /* Disallow indirect through a virtual register. This leads to
3620 compiler aborts when trying to eliminate them. */
3621 if (GET_CODE (op) == REG
3622 && (op == arg_pointer_rtx
3623 || op == frame_pointer_rtx
3624 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3625 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3628 /* Explicitly allow SYMBOL_REF even if pic. */
3629 if (GET_CODE (op) == SYMBOL_REF)
3632 /* Otherwise we can only allow register operands. */
3633 return register_operand (op, Pmode);
3637 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3639 if (GET_CODE (op) == CONST
3640 && GET_CODE (XEXP (op, 0)) == PLUS
3641 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3642 op = XEXP (XEXP (op, 0), 0);
3643 return GET_CODE (op) == SYMBOL_REF;
3646 /* Match exactly zero and one. */
3649 const0_operand (register rtx op, enum machine_mode mode)
3651 return op == CONST0_RTX (mode);
3655 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3657 return op == const1_rtx;
3660 /* Match 2, 4, or 8. Used for leal multiplicands. */
3663 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3665 return (GET_CODE (op) == CONST_INT
3666 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3670 const_0_to_3_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3672 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3676 const_0_to_7_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3678 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3682 const_0_to_15_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3684 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3688 const_0_to_255_operand (register rtx op,
3689 enum machine_mode mode ATTRIBUTE_UNUSED)
3691 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3695 /* True if this is a constant appropriate for an increment or decrement. */
3698 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3700 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3701 registers, since carry flag is not set. */
3702 if (TARGET_PENTIUM4 && !optimize_size)
3704 return op == const1_rtx || op == constm1_rtx;
3707 /* Return nonzero if OP is acceptable as operand of DImode shift
3711 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3714 return nonimmediate_operand (op, mode);
3716 return register_operand (op, mode);
3719 /* Return false if this is the stack pointer, or any other fake
3720 register eliminable to the stack pointer. Otherwise, this is
3723 This is used to prevent esp from being used as an index reg.
3724 Which would only happen in pathological cases. */
3727 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3730 if (GET_CODE (t) == SUBREG)
3732 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3735 return register_operand (op, mode);
3739 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3741 return MMX_REG_P (op);
3744 /* Return false if this is any eliminable register. Otherwise
3748 general_no_elim_operand (register rtx op, enum machine_mode mode)
3751 if (GET_CODE (t) == SUBREG)
3753 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3754 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3755 || t == virtual_stack_dynamic_rtx)
3758 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3759 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3762 return general_operand (op, mode);
3765 /* Return false if this is any eliminable register. Otherwise
3766 register_operand or const_int. */
3769 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3772 if (GET_CODE (t) == SUBREG)
3774 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3775 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3776 || t == virtual_stack_dynamic_rtx)
3779 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3782 /* Return false if this is any eliminable register or stack register,
3783 otherwise work like register_operand. */
3786 index_register_operand (register rtx op, enum machine_mode mode)
3789 if (GET_CODE (t) == SUBREG)
3793 if (t == arg_pointer_rtx
3794 || t == frame_pointer_rtx
3795 || t == virtual_incoming_args_rtx
3796 || t == virtual_stack_vars_rtx
3797 || t == virtual_stack_dynamic_rtx
3798 || REGNO (t) == STACK_POINTER_REGNUM)
3801 return general_operand (op, mode);
3804 /* Return true if op is a Q_REGS class register. */
3807 q_regs_operand (register rtx op, enum machine_mode mode)
3809 if (mode != VOIDmode && GET_MODE (op) != mode)
3811 if (GET_CODE (op) == SUBREG)
3812 op = SUBREG_REG (op);
3813 return ANY_QI_REG_P (op);
3816 /* Return true if op is an flags register. */
3819 flags_reg_operand (register rtx op, enum machine_mode mode)
3821 if (mode != VOIDmode && GET_MODE (op) != mode)
3823 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3826 /* Return true if op is a NON_Q_REGS class register. */
3829 non_q_regs_operand (register rtx op, enum machine_mode mode)
3831 if (mode != VOIDmode && GET_MODE (op) != mode)
3833 if (GET_CODE (op) == SUBREG)
3834 op = SUBREG_REG (op);
3835 return NON_QI_REG_P (op);
3839 zero_extended_scalar_load_operand (rtx op,
3840 enum machine_mode mode ATTRIBUTE_UNUSED)
3843 if (GET_CODE (op) != MEM)
3845 op = maybe_get_pool_constant (op);
3848 if (GET_CODE (op) != CONST_VECTOR)
3851 (GET_MODE_SIZE (GET_MODE (op)) /
3852 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3853 for (n_elts--; n_elts > 0; n_elts--)
3855 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3856 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3862 /* Return 1 when OP is operand acceptable for standard SSE move. */
3864 vector_move_operand (rtx op, enum machine_mode mode)
3866 if (nonimmediate_operand (op, mode))
3868 if (GET_MODE (op) != mode && mode != VOIDmode)
3870 return (op == CONST0_RTX (GET_MODE (op)));
3873 /* Return true if op if a valid address, and does not contain
3874 a segment override. */
3877 no_seg_address_operand (register rtx op, enum machine_mode mode)
3879 struct ix86_address parts;
3881 if (! address_operand (op, mode))
3884 if (! ix86_decompose_address (op, &parts))
3887 return parts.seg == SEG_DEFAULT;
3890 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3893 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3895 enum rtx_code code = GET_CODE (op);
3898 /* Operations supported directly. */
3908 /* These are equivalent to ones above in non-IEEE comparisons. */
3915 return !TARGET_IEEE_FP;
3920 /* Return 1 if OP is a valid comparison operator in valid mode. */
3922 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3924 enum machine_mode inmode;
3925 enum rtx_code code = GET_CODE (op);
3926 if (mode != VOIDmode && GET_MODE (op) != mode)
3928 if (GET_RTX_CLASS (code) != '<')
3930 inmode = GET_MODE (XEXP (op, 0));
3932 if (inmode == CCFPmode || inmode == CCFPUmode)
3934 enum rtx_code second_code, bypass_code;
3935 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3936 return (bypass_code == NIL && second_code == NIL);
3943 if (inmode == CCmode || inmode == CCGCmode
3944 || inmode == CCGOCmode || inmode == CCNOmode)
3947 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3948 if (inmode == CCmode)
3952 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3960 /* Return 1 if OP is a valid comparison operator testing carry flag
3963 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3965 enum machine_mode inmode;
3966 enum rtx_code code = GET_CODE (op);
3968 if (mode != VOIDmode && GET_MODE (op) != mode)
3970 if (GET_RTX_CLASS (code) != '<')
3972 inmode = GET_MODE (XEXP (op, 0));
3973 if (GET_CODE (XEXP (op, 0)) != REG
3974 || REGNO (XEXP (op, 0)) != 17
3975 || XEXP (op, 1) != const0_rtx)
3978 if (inmode == CCFPmode || inmode == CCFPUmode)
3980 enum rtx_code second_code, bypass_code;
3982 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3983 if (bypass_code != NIL || second_code != NIL)
3985 code = ix86_fp_compare_code_to_integer (code);
3987 else if (inmode != CCmode)
3992 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3995 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3997 enum machine_mode inmode;
3998 enum rtx_code code = GET_CODE (op);
4000 if (mode != VOIDmode && GET_MODE (op) != mode)
4002 if (GET_RTX_CLASS (code) != '<')
4004 inmode = GET_MODE (XEXP (op, 0));
4005 if (inmode == CCFPmode || inmode == CCFPUmode)
4007 enum rtx_code second_code, bypass_code;
4009 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4010 if (bypass_code != NIL || second_code != NIL)
4012 code = ix86_fp_compare_code_to_integer (code);
4014 /* i387 supports just limited amount of conditional codes. */
4017 case LTU: case GTU: case LEU: case GEU:
4018 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4021 case ORDERED: case UNORDERED:
4029 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4032 promotable_binary_operator (register rtx op,
4033 enum machine_mode mode ATTRIBUTE_UNUSED)
4035 switch (GET_CODE (op))
4038 /* Modern CPUs have same latency for HImode and SImode multiply,
4039 but 386 and 486 do HImode multiply faster. */
4040 return ix86_tune > PROCESSOR_I486;
4052 /* Nearly general operand, but accept any const_double, since we wish
4053 to be able to drop them into memory rather than have them get pulled
4057 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
4059 if (mode != VOIDmode && mode != GET_MODE (op))
4061 if (GET_CODE (op) == CONST_DOUBLE)
4063 return general_operand (op, mode);
4066 /* Match an SI or HImode register for a zero_extract. */
4069 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4072 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4073 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4076 if (!register_operand (op, VOIDmode))
4079 /* Be careful to accept only registers having upper parts. */
4080 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4081 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4084 /* Return 1 if this is a valid binary floating-point operation.
4085 OP is the expression matched, and MODE is its mode. */
4088 binary_fp_operator (register rtx op, enum machine_mode mode)
4090 if (mode != VOIDmode && mode != GET_MODE (op))
4093 switch (GET_CODE (op))
4099 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4107 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4109 return GET_CODE (op) == MULT;
4113 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4115 return GET_CODE (op) == DIV;
4119 arith_or_logical_operator (rtx op, enum machine_mode mode)
4121 return ((mode == VOIDmode || GET_MODE (op) == mode)
4122 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4123 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4126 /* Returns 1 if OP is memory operand with a displacement. */
4129 memory_displacement_operand (register rtx op, enum machine_mode mode)
4131 struct ix86_address parts;
4133 if (! memory_operand (op, mode))
4136 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4139 return parts.disp != NULL_RTX;
4142 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4143 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4145 ??? It seems likely that this will only work because cmpsi is an
4146 expander, and no actual insns use this. */
4149 cmpsi_operand (rtx op, enum machine_mode mode)
4151 if (nonimmediate_operand (op, mode))
4154 if (GET_CODE (op) == AND
4155 && GET_MODE (op) == SImode
4156 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4157 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4158 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4159 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4160 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4161 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4167 /* Returns 1 if OP is memory operand that can not be represented by the
4171 long_memory_operand (register rtx op, enum machine_mode mode)
4173 if (! memory_operand (op, mode))
4176 return memory_address_length (op) != 0;
4179 /* Return nonzero if the rtx is known aligned. */
4182 aligned_operand (rtx op, enum machine_mode mode)
4184 struct ix86_address parts;
4186 if (!general_operand (op, mode))
4189 /* Registers and immediate operands are always "aligned". */
4190 if (GET_CODE (op) != MEM)
4193 /* Don't even try to do any aligned optimizations with volatiles. */
4194 if (MEM_VOLATILE_P (op))
4199 /* Pushes and pops are only valid on the stack pointer. */
4200 if (GET_CODE (op) == PRE_DEC
4201 || GET_CODE (op) == POST_INC)
4204 /* Decode the address. */
4205 if (! ix86_decompose_address (op, &parts))
4208 if (parts.base && GET_CODE (parts.base) == SUBREG)
4209 parts.base = SUBREG_REG (parts.base);
4210 if (parts.index && GET_CODE (parts.index) == SUBREG)
4211 parts.index = SUBREG_REG (parts.index);
4213 /* Look for some component that isn't known to be aligned. */
4217 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4222 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4227 if (GET_CODE (parts.disp) != CONST_INT
4228 || (INTVAL (parts.disp) & 3) != 0)
4232 /* Didn't find one -- this must be an aligned address. */
4236 /* Initialize the table of extra 80387 mathematical constants. */
4239 init_ext_80387_constants (void)
4241 static const char * cst[5] =
4243 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4244 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4245 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4246 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4247 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4251 for (i = 0; i < 5; i++)
4253 real_from_string (&ext_80387_constants_table[i], cst[i]);
4254 /* Ensure each constant is rounded to XFmode precision. */
4255 real_convert (&ext_80387_constants_table[i],
4256 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
4257 &ext_80387_constants_table[i]);
4260 ext_80387_constants_init = 1;
4263 /* Return true if the constant is something that can be loaded with
4264 a special instruction. */
4267 standard_80387_constant_p (rtx x)
4269 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4272 if (x == CONST0_RTX (GET_MODE (x)))
4274 if (x == CONST1_RTX (GET_MODE (x)))
4277 /* For XFmode constants, try to find a special 80387 instruction on
4278 those CPUs that benefit from them. */
4279 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
4280 && x86_ext_80387_constants & TUNEMASK)
4285 if (! ext_80387_constants_init)
4286 init_ext_80387_constants ();
4288 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4289 for (i = 0; i < 5; i++)
4290 if (real_identical (&r, &ext_80387_constants_table[i]))
4297 /* Return the opcode of the special instruction to be used to load
4301 standard_80387_constant_opcode (rtx x)
4303 switch (standard_80387_constant_p (x))
4323 /* Return the CONST_DOUBLE representing the 80387 constant that is
4324 loaded by the specified special instruction. The argument IDX
4325 matches the return value from standard_80387_constant_p. */
4328 standard_80387_constant_rtx (int idx)
4332 if (! ext_80387_constants_init)
4333 init_ext_80387_constants ();
4349 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4350 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
4353 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4356 standard_sse_constant_p (rtx x)
4358 if (x == const0_rtx)
4360 return (x == CONST0_RTX (GET_MODE (x)));
4363 /* Returns 1 if OP contains a symbol reference */
4366 symbolic_reference_mentioned_p (rtx op)
4368 register const char *fmt;
4371 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4374 fmt = GET_RTX_FORMAT (GET_CODE (op));
4375 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4381 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4382 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4386 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4393 /* Return 1 if it is appropriate to emit `ret' instructions in the
4394 body of a function. Do this only if the epilogue is simple, needing a
4395 couple of insns. Prior to reloading, we can't tell how many registers
4396 must be saved, so return 0 then. Return 0 if there is no frame
4397 marker to de-allocate.
4399 If NON_SAVING_SETJMP is defined and true, then it is not possible
4400 for the epilogue to be simple, so return 0. This is a special case
4401 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4402 until final, but jump_optimize may need to know sooner if a
4406 ix86_can_use_return_insn_p (void)
4408 struct ix86_frame frame;
4410 #ifdef NON_SAVING_SETJMP
4411 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4415 if (! reload_completed || frame_pointer_needed)
4418 /* Don't allow more than 32 pop, since that's all we can do
4419 with one instruction. */
4420 if (current_function_pops_args
4421 && current_function_args_size >= 32768)
4424 ix86_compute_frame_layout (&frame);
4425 return frame.to_allocate == 0 && frame.nregs == 0;
4428 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4430 x86_64_sign_extended_value (rtx value)
4432 switch (GET_CODE (value))
4434 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4435 to be at least 32 and this all acceptable constants are
4436 represented as CONST_INT. */
4438 if (HOST_BITS_PER_WIDE_INT == 32)
4442 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4443 return trunc_int_for_mode (val, SImode) == val;
4447 /* For certain code models, the symbolic references are known to fit.
4448 in CM_SMALL_PIC model we know it fits if it is local to the shared
4449 library. Don't count TLS SYMBOL_REFs here, since they should fit
4450 only if inside of UNSPEC handled below. */
4452 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4454 /* For certain code models, the code is near as well. */
4456 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4457 || ix86_cmodel == CM_KERNEL);
4459 /* We also may accept the offsetted memory references in certain special
4462 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4463 switch (XINT (XEXP (value, 0), 1))
4465 case UNSPEC_GOTPCREL:
4467 case UNSPEC_GOTNTPOFF:
4473 if (GET_CODE (XEXP (value, 0)) == PLUS)
4475 rtx op1 = XEXP (XEXP (value, 0), 0);
4476 rtx op2 = XEXP (XEXP (value, 0), 1);
4477 HOST_WIDE_INT offset;
4479 if (ix86_cmodel == CM_LARGE)
4481 if (GET_CODE (op2) != CONST_INT)
4483 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4484 switch (GET_CODE (op1))
4487 /* For CM_SMALL assume that latest object is 16MB before
4488 end of 31bits boundary. We may also accept pretty
4489 large negative constants knowing that all objects are
4490 in the positive half of address space. */
4491 if (ix86_cmodel == CM_SMALL
4492 && offset < 16*1024*1024
4493 && trunc_int_for_mode (offset, SImode) == offset)
4495 /* For CM_KERNEL we know that all object resist in the
4496 negative half of 32bits address space. We may not
4497 accept negative offsets, since they may be just off
4498 and we may accept pretty large positive ones. */
4499 if (ix86_cmodel == CM_KERNEL
4501 && trunc_int_for_mode (offset, SImode) == offset)
4505 /* These conditions are similar to SYMBOL_REF ones, just the
4506 constraints for code models differ. */
4507 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4508 && offset < 16*1024*1024
4509 && trunc_int_for_mode (offset, SImode) == offset)
4511 if (ix86_cmodel == CM_KERNEL
4513 && trunc_int_for_mode (offset, SImode) == offset)
4517 switch (XINT (op1, 1))
4522 && trunc_int_for_mode (offset, SImode) == offset)
4536 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4538 x86_64_zero_extended_value (rtx value)
4540 switch (GET_CODE (value))
4543 if (HOST_BITS_PER_WIDE_INT == 32)
4544 return (GET_MODE (value) == VOIDmode
4545 && !CONST_DOUBLE_HIGH (value));
4549 if (HOST_BITS_PER_WIDE_INT == 32)
4550 return INTVAL (value) >= 0;
4552 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4555 /* For certain code models, the symbolic references are known to fit. */
4557 return ix86_cmodel == CM_SMALL;
4559 /* For certain code models, the code is near as well. */
4561 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4563 /* We also may accept the offsetted memory references in certain special
4566 if (GET_CODE (XEXP (value, 0)) == PLUS)
4568 rtx op1 = XEXP (XEXP (value, 0), 0);
4569 rtx op2 = XEXP (XEXP (value, 0), 1);
4571 if (ix86_cmodel == CM_LARGE)
4573 switch (GET_CODE (op1))
4577 /* For small code model we may accept pretty large positive
4578 offsets, since one bit is available for free. Negative
4579 offsets are limited by the size of NULL pointer area
4580 specified by the ABI. */
4581 if (ix86_cmodel == CM_SMALL
4582 && GET_CODE (op2) == CONST_INT
4583 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4584 && (trunc_int_for_mode (INTVAL (op2), SImode)
4587 /* ??? For the kernel, we may accept adjustment of
4588 -0x10000000, since we know that it will just convert
4589 negative address space to positive, but perhaps this
4590 is not worthwhile. */
4593 /* These conditions are similar to SYMBOL_REF ones, just the
4594 constraints for code models differ. */
4595 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4596 && GET_CODE (op2) == CONST_INT
4597 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4598 && (trunc_int_for_mode (INTVAL (op2), SImode)
4612 /* Value should be nonzero if functions must have frame pointers.
4613 Zero means the frame pointer need not be set up (and parms may
4614 be accessed via the stack pointer) in functions that seem suitable. */
4617 ix86_frame_pointer_required (void)
4619 /* If we accessed previous frames, then the generated code expects
4620 to be able to access the saved ebp value in our frame. */
4621 if (cfun->machine->accesses_prev_frame)
4624 /* Several x86 os'es need a frame pointer for other reasons,
4625 usually pertaining to setjmp. */
4626 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4629 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4630 the frame pointer by default. Turn it back on now if we've not
4631 got a leaf function. */
4632 if (TARGET_OMIT_LEAF_FRAME_POINTER
4633 && (!current_function_is_leaf))
4636 if (current_function_profile)
4642 /* Record that the current function accesses previous call frames. */
4645 ix86_setup_frame_addresses (void)
4647 cfun->machine->accesses_prev_frame = 1;
4650 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4651 # define USE_HIDDEN_LINKONCE 1
4653 # define USE_HIDDEN_LINKONCE 0
4656 static int pic_labels_used;
4658 /* Fills in the label name that should be used for a pc thunk for
4659 the given register. */
4662 get_pc_thunk_name (char name[32], unsigned int regno)
4664 if (USE_HIDDEN_LINKONCE)
4665 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4667 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4671 /* This function generates code for -fpic that loads %ebx with
4672 the return address of the caller and then returns. */
4675 ix86_file_end (void)
4680 for (regno = 0; regno < 8; ++regno)
4684 if (! ((pic_labels_used >> regno) & 1))
4687 get_pc_thunk_name (name, regno);
4689 if (USE_HIDDEN_LINKONCE)
4693 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4695 TREE_PUBLIC (decl) = 1;
4696 TREE_STATIC (decl) = 1;
4697 DECL_ONE_ONLY (decl) = 1;
4699 (*targetm.asm_out.unique_section) (decl, 0);
4700 named_section (decl, NULL, 0);
4702 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4703 fputs ("\t.hidden\t", asm_out_file);
4704 assemble_name (asm_out_file, name);
4705 fputc ('\n', asm_out_file);
4706 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4711 ASM_OUTPUT_LABEL (asm_out_file, name);
4714 xops[0] = gen_rtx_REG (SImode, regno);
4715 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4716 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4717 output_asm_insn ("ret", xops);
4720 if (NEED_INDICATE_EXEC_STACK)
4721 file_end_indicate_exec_stack ();
4724 /* Emit code for the SET_GOT patterns. */
4727 output_set_got (rtx dest)
4732 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4734 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4736 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4739 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4741 output_asm_insn ("call\t%a2", xops);
4744 /* Output the "canonical" label name ("Lxx$pb") here too. This
4745 is what will be referred to by the Mach-O PIC subsystem. */
4746 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4748 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4749 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4752 output_asm_insn ("pop{l}\t%0", xops);
4757 get_pc_thunk_name (name, REGNO (dest));
4758 pic_labels_used |= 1 << REGNO (dest);
4760 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4761 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4762 output_asm_insn ("call\t%X2", xops);
4765 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4766 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4767 else if (!TARGET_MACHO)
4768 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4773 /* Generate an "push" pattern for input ARG. */
4778 return gen_rtx_SET (VOIDmode,
4780 gen_rtx_PRE_DEC (Pmode,
4781 stack_pointer_rtx)),
4785 /* Return >= 0 if there is an unused call-clobbered register available
4786 for the entire function. */
4789 ix86_select_alt_pic_regnum (void)
4791 if (current_function_is_leaf && !current_function_profile)
4794 for (i = 2; i >= 0; --i)
4795 if (!regs_ever_live[i])
4799 return INVALID_REGNUM;
4802 /* Return 1 if we need to save REGNO. */
4804 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4806 if (pic_offset_table_rtx
4807 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4808 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4809 || current_function_profile
4810 || current_function_calls_eh_return
4811 || current_function_uses_const_pool))
4813 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4818 if (current_function_calls_eh_return && maybe_eh_return)
4823 unsigned test = EH_RETURN_DATA_REGNO (i);
4824 if (test == INVALID_REGNUM)
4831 return (regs_ever_live[regno]
4832 && !call_used_regs[regno]
4833 && !fixed_regs[regno]
4834 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4837 /* Return number of registers to be saved on the stack. */
4840 ix86_nsaved_regs (void)
4845 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4846 if (ix86_save_reg (regno, true))
4851 /* Return the offset between two registers, one to be eliminated, and the other
4852 its replacement, at the start of a routine. */
4855 ix86_initial_elimination_offset (int from, int to)
4857 struct ix86_frame frame;
4858 ix86_compute_frame_layout (&frame);
4860 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4861 return frame.hard_frame_pointer_offset;
4862 else if (from == FRAME_POINTER_REGNUM
4863 && to == HARD_FRAME_POINTER_REGNUM)
4864 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4867 if (to != STACK_POINTER_REGNUM)
4869 else if (from == ARG_POINTER_REGNUM)
4870 return frame.stack_pointer_offset;
4871 else if (from != FRAME_POINTER_REGNUM)
4874 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4878 /* Fill structure ix86_frame about frame of currently computed function. */
4881 ix86_compute_frame_layout (struct ix86_frame *frame)
4883 HOST_WIDE_INT total_size;
4884 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4885 HOST_WIDE_INT offset;
4886 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4887 HOST_WIDE_INT size = get_frame_size ();
4889 frame->nregs = ix86_nsaved_regs ();
4892 /* During reload iteration the amount of registers saved can change.
4893 Recompute the value as needed. Do not recompute when amount of registers
4894 didn't change as reload does mutiple calls to the function and does not
4895 expect the decision to change within single iteration. */
4897 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4899 int count = frame->nregs;
4901 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4902 /* The fast prologue uses move instead of push to save registers. This
4903 is significantly longer, but also executes faster as modern hardware
4904 can execute the moves in parallel, but can't do that for push/pop.
4906 Be careful about choosing what prologue to emit: When function takes
4907 many instructions to execute we may use slow version as well as in
4908 case function is known to be outside hot spot (this is known with
4909 feedback only). Weight the size of function by number of registers
4910 to save as it is cheap to use one or two push instructions but very
4911 slow to use many of them. */
4913 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4914 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4915 || (flag_branch_probabilities
4916 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4917 cfun->machine->use_fast_prologue_epilogue = false;
4919 cfun->machine->use_fast_prologue_epilogue
4920 = !expensive_function_p (count);
4922 if (TARGET_PROLOGUE_USING_MOVE
4923 && cfun->machine->use_fast_prologue_epilogue)
4924 frame->save_regs_using_mov = true;
4926 frame->save_regs_using_mov = false;
4929 /* Skip return address and saved base pointer. */
4930 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4932 frame->hard_frame_pointer_offset = offset;
4934 /* Do some sanity checking of stack_alignment_needed and
4935 preferred_alignment, since i386 port is the only using those features
4936 that may break easily. */
4938 if (size && !stack_alignment_needed)
4940 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4942 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4944 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4947 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4948 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4950 /* Register save area */
4951 offset += frame->nregs * UNITS_PER_WORD;
4954 if (ix86_save_varrargs_registers)
4956 offset += X86_64_VARARGS_SIZE;
4957 frame->va_arg_size = X86_64_VARARGS_SIZE;
4960 frame->va_arg_size = 0;
4962 /* Align start of frame for local function. */
4963 frame->padding1 = ((offset + stack_alignment_needed - 1)
4964 & -stack_alignment_needed) - offset;
4966 offset += frame->padding1;
4968 /* Frame pointer points here. */
4969 frame->frame_pointer_offset = offset;
4973 /* Add outgoing arguments area. Can be skipped if we eliminated
4974 all the function calls as dead code. */
4975 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4977 offset += current_function_outgoing_args_size;
4978 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4981 frame->outgoing_arguments_size = 0;
4983 /* Align stack boundary. Only needed if we're calling another function
4985 if (!current_function_is_leaf || current_function_calls_alloca)
4986 frame->padding2 = ((offset + preferred_alignment - 1)
4987 & -preferred_alignment) - offset;
4989 frame->padding2 = 0;
4991 offset += frame->padding2;
4993 /* We've reached end of stack frame. */
4994 frame->stack_pointer_offset = offset;
4996 /* Size prologue needs to allocate. */
4997 frame->to_allocate =
4998 (size + frame->padding1 + frame->padding2
4999 + frame->outgoing_arguments_size + frame->va_arg_size);
5001 if ((!frame->to_allocate && frame->nregs <= 1)
5002 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5003 frame->save_regs_using_mov = false;
5005 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5006 && current_function_is_leaf)
5008 frame->red_zone_size = frame->to_allocate;
5009 if (frame->save_regs_using_mov)
5010 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5011 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5012 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5015 frame->red_zone_size = 0;
5016 frame->to_allocate -= frame->red_zone_size;
5017 frame->stack_pointer_offset -= frame->red_zone_size;
5019 fprintf (stderr, "nregs: %i\n", frame->nregs);
5020 fprintf (stderr, "size: %i\n", size);
5021 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5022 fprintf (stderr, "padding1: %i\n", frame->padding1);
5023 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5024 fprintf (stderr, "padding2: %i\n", frame->padding2);
5025 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5026 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5027 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5028 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5029 frame->hard_frame_pointer_offset);
5030 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5034 /* Emit code to save registers in the prologue. */
5037 ix86_emit_save_regs (void)
5042 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5043 if (ix86_save_reg (regno, true))
5045 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5046 RTX_FRAME_RELATED_P (insn) = 1;
5050 /* Emit code to save registers using MOV insns. First register
5051 is restored from POINTER + OFFSET. */
5053 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5058 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5059 if (ix86_save_reg (regno, true))
5061 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5063 gen_rtx_REG (Pmode, regno));
5064 RTX_FRAME_RELATED_P (insn) = 1;
5065 offset += UNITS_PER_WORD;
5069 /* Expand prologue or epilogue stack adjustement.
5070 The pattern exist to put a dependency on all ebp-based memory accesses.
5071 STYLE should be negative if instructions should be marked as frame related,
5072 zero if %r11 register is live and cannot be freely used and positive
5076 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5081 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5082 else if (x86_64_immediate_operand (offset, DImode))
5083 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5087 /* r11 is used by indirect sibcall return as well, set before the
5088 epilogue and used after the epilogue. ATM indirect sibcall
5089 shouldn't be used together with huge frame sizes in one
5090 function because of the frame_size check in sibcall.c. */
5093 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5094 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5096 RTX_FRAME_RELATED_P (insn) = 1;
5097 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5101 RTX_FRAME_RELATED_P (insn) = 1;
5104 /* Expand the prologue into a bunch of separate insns. */
5107 ix86_expand_prologue (void)
5111 struct ix86_frame frame;
5112 HOST_WIDE_INT allocate;
5114 ix86_compute_frame_layout (&frame);
5116 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5117 slower on all targets. Also sdb doesn't like it. */
5119 if (frame_pointer_needed)
5121 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5122 RTX_FRAME_RELATED_P (insn) = 1;
5124 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5125 RTX_FRAME_RELATED_P (insn) = 1;
5128 allocate = frame.to_allocate;
5130 if (!frame.save_regs_using_mov)
5131 ix86_emit_save_regs ();
5133 allocate += frame.nregs * UNITS_PER_WORD;
5135 /* When using red zone we may start register saving before allocating
5136 the stack frame saving one cycle of the prologue. */
5137 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5138 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5139 : stack_pointer_rtx,
5140 -frame.nregs * UNITS_PER_WORD);
5144 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5145 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5146 GEN_INT (-allocate), -1);
5149 /* Only valid for Win32. */
5150 rtx eax = gen_rtx_REG (SImode, 0);
5151 bool eax_live = ix86_eax_live_at_start_p ();
5158 emit_insn (gen_push (eax));
5162 insn = emit_move_insn (eax, GEN_INT (allocate));
5163 RTX_FRAME_RELATED_P (insn) = 1;
5165 insn = emit_insn (gen_allocate_stack_worker (eax));
5166 RTX_FRAME_RELATED_P (insn) = 1;
5170 rtx t = plus_constant (stack_pointer_rtx, allocate);
5171 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5175 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5177 if (!frame_pointer_needed || !frame.to_allocate)
5178 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5180 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5181 -frame.nregs * UNITS_PER_WORD);
5184 pic_reg_used = false;
5185 if (pic_offset_table_rtx
5186 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5187 || current_function_profile))
5189 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5191 if (alt_pic_reg_used != INVALID_REGNUM)
5192 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5194 pic_reg_used = true;
5199 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5201 /* Even with accurate pre-reload life analysis, we can wind up
5202 deleting all references to the pic register after reload.
5203 Consider if cross-jumping unifies two sides of a branch
5204 controlled by a comparison vs the only read from a global.
5205 In which case, allow the set_got to be deleted, though we're
5206 too late to do anything about the ebx save in the prologue. */
5207 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5210 /* Prevent function calls from be scheduled before the call to mcount.
5211 In the pic_reg_used case, make sure that the got load isn't deleted. */
5212 if (current_function_profile)
5213 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5216 /* Emit code to restore saved registers using MOV insns. First register
5217 is restored from POINTER + OFFSET. */
5219 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5223 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5224 if (ix86_save_reg (regno, maybe_eh_return))
5226 emit_move_insn (gen_rtx_REG (Pmode, regno),
5227 adjust_address (gen_rtx_MEM (Pmode, pointer),
5229 offset += UNITS_PER_WORD;
5233 /* Restore function stack, frame, and registers. */
5236 ix86_expand_epilogue (int style)
5239 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5240 struct ix86_frame frame;
5241 HOST_WIDE_INT offset;
5243 ix86_compute_frame_layout (&frame);
5245 /* Calculate start of saved registers relative to ebp. Special care
5246 must be taken for the normal return case of a function using
5247 eh_return: the eax and edx registers are marked as saved, but not
5248 restored along this path. */
5249 offset = frame.nregs;
5250 if (current_function_calls_eh_return && style != 2)
5252 offset *= -UNITS_PER_WORD;
5254 /* If we're only restoring one register and sp is not valid then
5255 using a move instruction to restore the register since it's
5256 less work than reloading sp and popping the register.
5258 The default code result in stack adjustment using add/lea instruction,
5259 while this code results in LEAVE instruction (or discrete equivalent),
5260 so it is profitable in some other cases as well. Especially when there
5261 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5262 and there is exactly one register to pop. This heuristic may need some
5263 tuning in future. */
5264 if ((!sp_valid && frame.nregs <= 1)
5265 || (TARGET_EPILOGUE_USING_MOVE
5266 && cfun->machine->use_fast_prologue_epilogue
5267 && (frame.nregs > 1 || frame.to_allocate))
5268 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5269 || (frame_pointer_needed && TARGET_USE_LEAVE
5270 && cfun->machine->use_fast_prologue_epilogue
5271 && frame.nregs == 1)
5272 || current_function_calls_eh_return)
5274 /* Restore registers. We can use ebp or esp to address the memory
5275 locations. If both are available, default to ebp, since offsets
5276 are known to be small. Only exception is esp pointing directly to the
5277 end of block of saved registers, where we may simplify addressing
5280 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5281 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5282 frame.to_allocate, style == 2);
5284 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5285 offset, style == 2);
5287 /* eh_return epilogues need %ecx added to the stack pointer. */
5290 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5292 if (frame_pointer_needed)
5294 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5295 tmp = plus_constant (tmp, UNITS_PER_WORD);
5296 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5298 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5299 emit_move_insn (hard_frame_pointer_rtx, tmp);
5301 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5306 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5307 tmp = plus_constant (tmp, (frame.to_allocate
5308 + frame.nregs * UNITS_PER_WORD));
5309 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5312 else if (!frame_pointer_needed)
5313 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5314 GEN_INT (frame.to_allocate
5315 + frame.nregs * UNITS_PER_WORD),
5317 /* If not an i386, mov & pop is faster than "leave". */
5318 else if (TARGET_USE_LEAVE || optimize_size
5319 || !cfun->machine->use_fast_prologue_epilogue)
5320 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5323 pro_epilogue_adjust_stack (stack_pointer_rtx,
5324 hard_frame_pointer_rtx,
5327 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5329 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5334 /* First step is to deallocate the stack frame so that we can
5335 pop the registers. */
5338 if (!frame_pointer_needed)
5340 pro_epilogue_adjust_stack (stack_pointer_rtx,
5341 hard_frame_pointer_rtx,
5342 GEN_INT (offset), style);
5344 else if (frame.to_allocate)
5345 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5346 GEN_INT (frame.to_allocate), style);
5348 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5349 if (ix86_save_reg (regno, false))
5352 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5354 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5356 if (frame_pointer_needed)
5358 /* Leave results in shorter dependency chains on CPUs that are
5359 able to grok it fast. */
5360 if (TARGET_USE_LEAVE)
5361 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5362 else if (TARGET_64BIT)
5363 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5365 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5369 /* Sibcall epilogues don't want a return instruction. */
5373 if (current_function_pops_args && current_function_args_size)
5375 rtx popc = GEN_INT (current_function_pops_args);
5377 /* i386 can only pop 64K bytes. If asked to pop more, pop
5378 return address, do explicit add, and jump indirectly to the
5381 if (current_function_pops_args >= 65536)
5383 rtx ecx = gen_rtx_REG (SImode, 2);
5385 /* There is no "pascal" calling convention in 64bit ABI. */
5389 emit_insn (gen_popsi1 (ecx));
5390 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5391 emit_jump_insn (gen_return_indirect_internal (ecx));
5394 emit_jump_insn (gen_return_pop_internal (popc));
5397 emit_jump_insn (gen_return_internal ());
5400 /* Reset from the function's potential modifications. */
5403 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5404 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5406 if (pic_offset_table_rtx)
5407 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5410 /* Extract the parts of an RTL expression that is a valid memory address
5411 for an instruction. Return 0 if the structure of the address is
5412 grossly off. Return -1 if the address contains ASHIFT, so it is not
5413 strictly valid, but still used for computing length of lea instruction. */
5416 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5418 rtx base = NULL_RTX;
5419 rtx index = NULL_RTX;
5420 rtx disp = NULL_RTX;
5421 HOST_WIDE_INT scale = 1;
5422 rtx scale_rtx = NULL_RTX;
5424 enum ix86_address_seg seg = SEG_DEFAULT;
5426 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5428 else if (GET_CODE (addr) == PLUS)
5438 addends[n++] = XEXP (op, 1);
5441 while (GET_CODE (op) == PLUS);
5446 for (i = n; i >= 0; --i)
5449 switch (GET_CODE (op))
5454 index = XEXP (op, 0);
5455 scale_rtx = XEXP (op, 1);
5459 if (XINT (op, 1) == UNSPEC_TP
5460 && TARGET_TLS_DIRECT_SEG_REFS
5461 && seg == SEG_DEFAULT)
5462 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5491 else if (GET_CODE (addr) == MULT)
5493 index = XEXP (addr, 0); /* index*scale */
5494 scale_rtx = XEXP (addr, 1);
5496 else if (GET_CODE (addr) == ASHIFT)
5500 /* We're called for lea too, which implements ashift on occasion. */
5501 index = XEXP (addr, 0);
5502 tmp = XEXP (addr, 1);
5503 if (GET_CODE (tmp) != CONST_INT)
5505 scale = INTVAL (tmp);
5506 if ((unsigned HOST_WIDE_INT) scale > 3)
5512 disp = addr; /* displacement */
5514 /* Extract the integral value of scale. */
5517 if (GET_CODE (scale_rtx) != CONST_INT)
5519 scale = INTVAL (scale_rtx);
5522 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5523 if (base && index && scale == 1
5524 && (index == arg_pointer_rtx
5525 || index == frame_pointer_rtx
5526 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5533 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5534 if ((base == hard_frame_pointer_rtx
5535 || base == frame_pointer_rtx
5536 || base == arg_pointer_rtx) && !disp)
5539 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5540 Avoid this by transforming to [%esi+0]. */
5541 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5542 && base && !index && !disp
5544 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5547 /* Special case: encode reg+reg instead of reg*2. */
5548 if (!base && index && scale && scale == 2)
5549 base = index, scale = 1;
5551 /* Special case: scaling cannot be encoded without base or displacement. */
5552 if (!base && !disp && index && scale != 1)
5564 /* Return cost of the memory address x.
5565 For i386, it is better to use a complex address than let gcc copy
5566 the address into a reg and make a new pseudo. But not if the address
5567 requires to two regs - that would mean more pseudos with longer
5570 ix86_address_cost (rtx x)
5572 struct ix86_address parts;
5575 if (!ix86_decompose_address (x, &parts))
5578 if (parts.base && GET_CODE (parts.base) == SUBREG)
5579 parts.base = SUBREG_REG (parts.base);
5580 if (parts.index && GET_CODE (parts.index) == SUBREG)
5581 parts.index = SUBREG_REG (parts.index);
5583 /* More complex memory references are better. */
5584 if (parts.disp && parts.disp != const0_rtx)
5586 if (parts.seg != SEG_DEFAULT)
5589 /* Attempt to minimize number of registers in the address. */
5591 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5593 && (!REG_P (parts.index)
5594 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5598 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5600 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5601 && parts.base != parts.index)
5604 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5605 since it's predecode logic can't detect the length of instructions
5606 and it degenerates to vector decoded. Increase cost of such
5607 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5608 to split such addresses or even refuse such addresses at all.
5610 Following addressing modes are affected:
5615 The first and last case may be avoidable by explicitly coding the zero in
5616 memory address, but I don't have AMD-K6 machine handy to check this
5620 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5621 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5622 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5628 /* If X is a machine specific address (i.e. a symbol or label being
5629 referenced as a displacement from the GOT implemented using an
5630 UNSPEC), then return the base term. Otherwise return X. */
5633 ix86_find_base_term (rtx x)
5639 if (GET_CODE (x) != CONST)
5642 if (GET_CODE (term) == PLUS
5643 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5644 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5645 term = XEXP (term, 0);
5646 if (GET_CODE (term) != UNSPEC
5647 || XINT (term, 1) != UNSPEC_GOTPCREL)
5650 term = XVECEXP (term, 0, 0);
5652 if (GET_CODE (term) != SYMBOL_REF
5653 && GET_CODE (term) != LABEL_REF)
5659 term = ix86_delegitimize_address (x);
5661 if (GET_CODE (term) != SYMBOL_REF
5662 && GET_CODE (term) != LABEL_REF)
5668 /* Determine if a given RTX is a valid constant. We already know this
5669 satisfies CONSTANT_P. */
5672 legitimate_constant_p (rtx x)
5676 switch (GET_CODE (x))
5679 /* TLS symbols are not constant. */
5680 if (tls_symbolic_operand (x, Pmode))
5685 inner = XEXP (x, 0);
5687 /* Offsets of TLS symbols are never valid.
5688 Discourage CSE from creating them. */
5689 if (GET_CODE (inner) == PLUS
5690 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5693 if (GET_CODE (inner) == PLUS)
5695 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5697 inner = XEXP (inner, 0);
5700 /* Only some unspecs are valid as "constants". */
5701 if (GET_CODE (inner) == UNSPEC)
5702 switch (XINT (inner, 1))
5706 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5708 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5718 /* Otherwise we handle everything else in the move patterns. */
5722 /* Determine if it's legal to put X into the constant pool. This
5723 is not possible for the address of thread-local symbols, which
5724 is checked above. */
5727 ix86_cannot_force_const_mem (rtx x)
5729 return !legitimate_constant_p (x);
5732 /* Determine if a given RTX is a valid constant address. */
5735 constant_address_p (rtx x)
5737 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5740 /* Nonzero if the constant value X is a legitimate general operand
5741 when generating PIC code. It is given that flag_pic is on and
5742 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5745 legitimate_pic_operand_p (rtx x)
5749 switch (GET_CODE (x))
5752 inner = XEXP (x, 0);
5754 /* Only some unspecs are valid as "constants". */
5755 if (GET_CODE (inner) == UNSPEC)
5756 switch (XINT (inner, 1))
5759 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5767 return legitimate_pic_address_disp_p (x);
5774 /* Determine if a given CONST RTX is a valid memory displacement
5778 legitimate_pic_address_disp_p (register rtx disp)
5782 /* In 64bit mode we can allow direct addresses of symbols and labels
5783 when they are not dynamic symbols. */
5786 /* TLS references should always be enclosed in UNSPEC. */
5787 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5789 if (GET_CODE (disp) == SYMBOL_REF
5790 && ix86_cmodel == CM_SMALL_PIC
5791 && SYMBOL_REF_LOCAL_P (disp))
5793 if (GET_CODE (disp) == LABEL_REF)
5795 if (GET_CODE (disp) == CONST
5796 && GET_CODE (XEXP (disp, 0)) == PLUS)
5798 rtx op0 = XEXP (XEXP (disp, 0), 0);
5799 rtx op1 = XEXP (XEXP (disp, 0), 1);
5801 /* TLS references should always be enclosed in UNSPEC. */
5802 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5804 if (((GET_CODE (op0) == SYMBOL_REF
5805 && ix86_cmodel == CM_SMALL_PIC
5806 && SYMBOL_REF_LOCAL_P (op0))
5807 || GET_CODE (op0) == LABEL_REF)
5808 && GET_CODE (op1) == CONST_INT
5809 && INTVAL (op1) < 16*1024*1024
5810 && INTVAL (op1) >= -16*1024*1024)
5814 if (GET_CODE (disp) != CONST)
5816 disp = XEXP (disp, 0);
5820 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5821 of GOT tables. We should not need these anyway. */
5822 if (GET_CODE (disp) != UNSPEC
5823 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5826 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5827 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5833 if (GET_CODE (disp) == PLUS)
5835 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5837 disp = XEXP (disp, 0);
5841 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5842 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5844 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5845 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5846 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5848 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5849 if (! strcmp (sym_name, "<pic base>"))
5854 if (GET_CODE (disp) != UNSPEC)
5857 switch (XINT (disp, 1))
5862 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5864 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5865 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5866 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5868 case UNSPEC_GOTTPOFF:
5869 case UNSPEC_GOTNTPOFF:
5870 case UNSPEC_INDNTPOFF:
5873 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5875 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5877 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5883 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5884 memory address for an instruction. The MODE argument is the machine mode
5885 for the MEM expression that wants to use this address.
5887 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5888 convert common non-canonical forms to canonical form so that they will
5892 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5894 struct ix86_address parts;
5895 rtx base, index, disp;
5896 HOST_WIDE_INT scale;
5897 const char *reason = NULL;
5898 rtx reason_rtx = NULL_RTX;
5900 if (TARGET_DEBUG_ADDR)
5903 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5904 GET_MODE_NAME (mode), strict);
5908 if (ix86_decompose_address (addr, &parts) <= 0)
5910 reason = "decomposition failed";
5915 index = parts.index;
5917 scale = parts.scale;
5919 /* Validate base register.
5921 Don't allow SUBREG's here, it can lead to spill failures when the base
5922 is one word out of a two word structure, which is represented internally
5930 if (GET_CODE (base) == SUBREG)
5931 reg = SUBREG_REG (base);
5935 if (GET_CODE (reg) != REG)
5937 reason = "base is not a register";
5941 if (GET_MODE (base) != Pmode)
5943 reason = "base is not in Pmode";
5947 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5948 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5950 reason = "base is not valid";
5955 /* Validate index register.
5957 Don't allow SUBREG's here, it can lead to spill failures when the index
5958 is one word out of a two word structure, which is represented internally
5966 if (GET_CODE (index) == SUBREG)
5967 reg = SUBREG_REG (index);
5971 if (GET_CODE (reg) != REG)
5973 reason = "index is not a register";
5977 if (GET_MODE (index) != Pmode)
5979 reason = "index is not in Pmode";
5983 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5984 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5986 reason = "index is not valid";
5991 /* Validate scale factor. */
5994 reason_rtx = GEN_INT (scale);
5997 reason = "scale without index";
6001 if (scale != 2 && scale != 4 && scale != 8)
6003 reason = "scale is not a valid multiplier";
6008 /* Validate displacement. */
6013 if (GET_CODE (disp) == CONST
6014 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6015 switch (XINT (XEXP (disp, 0), 1))
6019 case UNSPEC_GOTPCREL:
6022 goto is_legitimate_pic;
6024 case UNSPEC_GOTTPOFF:
6025 case UNSPEC_GOTNTPOFF:
6026 case UNSPEC_INDNTPOFF:
6032 reason = "invalid address unspec";
6036 else if (flag_pic && (SYMBOLIC_CONST (disp)
6038 && !machopic_operand_p (disp)
6043 if (TARGET_64BIT && (index || base))
6045 /* foo@dtpoff(%rX) is ok. */
6046 if (GET_CODE (disp) != CONST
6047 || GET_CODE (XEXP (disp, 0)) != PLUS
6048 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6049 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6050 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6051 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6053 reason = "non-constant pic memory reference";
6057 else if (! legitimate_pic_address_disp_p (disp))
6059 reason = "displacement is an invalid pic construct";
6063 /* This code used to verify that a symbolic pic displacement
6064 includes the pic_offset_table_rtx register.
6066 While this is good idea, unfortunately these constructs may
6067 be created by "adds using lea" optimization for incorrect
6076 This code is nonsensical, but results in addressing
6077 GOT table with pic_offset_table_rtx base. We can't
6078 just refuse it easily, since it gets matched by
6079 "addsi3" pattern, that later gets split to lea in the
6080 case output register differs from input. While this
6081 can be handled by separate addsi pattern for this case
6082 that never results in lea, this seems to be easier and
6083 correct fix for crash to disable this test. */
6085 else if (GET_CODE (disp) != LABEL_REF
6086 && GET_CODE (disp) != CONST_INT
6087 && (GET_CODE (disp) != CONST
6088 || !legitimate_constant_p (disp))
6089 && (GET_CODE (disp) != SYMBOL_REF
6090 || !legitimate_constant_p (disp)))
6092 reason = "displacement is not constant";
6095 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6097 reason = "displacement is out of range";
6102 /* Everything looks valid. */
6103 if (TARGET_DEBUG_ADDR)
6104 fprintf (stderr, "Success.\n");
6108 if (TARGET_DEBUG_ADDR)
6110 fprintf (stderr, "Error: %s\n", reason);
6111 debug_rtx (reason_rtx);
6116 /* Return an unique alias set for the GOT. */
6118 static HOST_WIDE_INT
6119 ix86_GOT_alias_set (void)
6121 static HOST_WIDE_INT set = -1;
6123 set = new_alias_set ();
6127 /* Return a legitimate reference for ORIG (an address) using the
6128 register REG. If REG is 0, a new pseudo is generated.
6130 There are two types of references that must be handled:
6132 1. Global data references must load the address from the GOT, via
6133 the PIC reg. An insn is emitted to do this load, and the reg is
6136 2. Static data references, constant pool addresses, and code labels
6137 compute the address as an offset from the GOT, whose base is in
6138 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6139 differentiate them from global data objects. The returned
6140 address is the PIC reg + an unspec constant.
6142 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6143 reg also appears in the address. */
6146 legitimize_pic_address (rtx orig, rtx reg)
6154 reg = gen_reg_rtx (Pmode);
6155 /* Use the generic Mach-O PIC machinery. */
6156 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6159 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6161 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6163 /* This symbol may be referenced via a displacement from the PIC
6164 base address (@GOTOFF). */
6166 if (reload_in_progress)
6167 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6168 if (GET_CODE (addr) == CONST)
6169 addr = XEXP (addr, 0);
6170 if (GET_CODE (addr) == PLUS)
6172 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6173 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6176 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6177 new = gen_rtx_CONST (Pmode, new);
6178 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6182 emit_move_insn (reg, new);
6186 else if (GET_CODE (addr) == SYMBOL_REF)
6190 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6191 new = gen_rtx_CONST (Pmode, new);
6192 new = gen_rtx_MEM (Pmode, new);
6193 RTX_UNCHANGING_P (new) = 1;
6194 set_mem_alias_set (new, ix86_GOT_alias_set ());
6197 reg = gen_reg_rtx (Pmode);
6198 /* Use directly gen_movsi, otherwise the address is loaded
6199 into register for CSE. We don't want to CSE this addresses,
6200 instead we CSE addresses from the GOT table, so skip this. */
6201 emit_insn (gen_movsi (reg, new));
6206 /* This symbol must be referenced via a load from the
6207 Global Offset Table (@GOT). */
6209 if (reload_in_progress)
6210 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6211 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6212 new = gen_rtx_CONST (Pmode, new);
6213 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6214 new = gen_rtx_MEM (Pmode, new);
6215 RTX_UNCHANGING_P (new) = 1;
6216 set_mem_alias_set (new, ix86_GOT_alias_set ());
6219 reg = gen_reg_rtx (Pmode);
6220 emit_move_insn (reg, new);
6226 if (GET_CODE (addr) == CONST)
6228 addr = XEXP (addr, 0);
6230 /* We must match stuff we generate before. Assume the only
6231 unspecs that can get here are ours. Not that we could do
6232 anything with them anyway... */
6233 if (GET_CODE (addr) == UNSPEC
6234 || (GET_CODE (addr) == PLUS
6235 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6237 if (GET_CODE (addr) != PLUS)
6240 if (GET_CODE (addr) == PLUS)
6242 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6244 /* Check first to see if this is a constant offset from a @GOTOFF
6245 symbol reference. */
6246 if (local_symbolic_operand (op0, Pmode)
6247 && GET_CODE (op1) == CONST_INT)
6251 if (reload_in_progress)
6252 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6253 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6255 new = gen_rtx_PLUS (Pmode, new, op1);
6256 new = gen_rtx_CONST (Pmode, new);
6257 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6261 emit_move_insn (reg, new);
6267 if (INTVAL (op1) < -16*1024*1024
6268 || INTVAL (op1) >= 16*1024*1024)
6269 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6274 base = legitimize_pic_address (XEXP (addr, 0), reg);
6275 new = legitimize_pic_address (XEXP (addr, 1),
6276 base == reg ? NULL_RTX : reg);
6278 if (GET_CODE (new) == CONST_INT)
6279 new = plus_constant (base, INTVAL (new));
6282 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6284 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6285 new = XEXP (new, 1);
6287 new = gen_rtx_PLUS (Pmode, base, new);
6295 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6298 get_thread_pointer (int to_reg)
6302 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6306 reg = gen_reg_rtx (Pmode);
6307 insn = gen_rtx_SET (VOIDmode, reg, tp);
6308 insn = emit_insn (insn);
6313 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6314 false if we expect this to be used for a memory address and true if
6315 we expect to load the address into a register. */
6318 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6320 rtx dest, base, off, pic;
6325 case TLS_MODEL_GLOBAL_DYNAMIC:
6326 dest = gen_reg_rtx (Pmode);
6329 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6332 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6333 insns = get_insns ();
6336 emit_libcall_block (insns, dest, rax, x);
6339 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6342 case TLS_MODEL_LOCAL_DYNAMIC:
6343 base = gen_reg_rtx (Pmode);
6346 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6349 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6350 insns = get_insns ();
6353 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6354 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6355 emit_libcall_block (insns, base, rax, note);
6358 emit_insn (gen_tls_local_dynamic_base_32 (base));
6360 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6361 off = gen_rtx_CONST (Pmode, off);
6363 return gen_rtx_PLUS (Pmode, base, off);
6365 case TLS_MODEL_INITIAL_EXEC:
6369 type = UNSPEC_GOTNTPOFF;
6373 if (reload_in_progress)
6374 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6375 pic = pic_offset_table_rtx;
6376 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6378 else if (!TARGET_GNU_TLS)
6380 pic = gen_reg_rtx (Pmode);
6381 emit_insn (gen_set_got (pic));
6382 type = UNSPEC_GOTTPOFF;
6387 type = UNSPEC_INDNTPOFF;
6390 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6391 off = gen_rtx_CONST (Pmode, off);
6393 off = gen_rtx_PLUS (Pmode, pic, off);
6394 off = gen_rtx_MEM (Pmode, off);
6395 RTX_UNCHANGING_P (off) = 1;
6396 set_mem_alias_set (off, ix86_GOT_alias_set ());
6398 if (TARGET_64BIT || TARGET_GNU_TLS)
6400 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6401 off = force_reg (Pmode, off);
6402 return gen_rtx_PLUS (Pmode, base, off);
6406 base = get_thread_pointer (true);
6407 dest = gen_reg_rtx (Pmode);
6408 emit_insn (gen_subsi3 (dest, base, off));
6412 case TLS_MODEL_LOCAL_EXEC:
6413 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6414 (TARGET_64BIT || TARGET_GNU_TLS)
6415 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6416 off = gen_rtx_CONST (Pmode, off);
6418 if (TARGET_64BIT || TARGET_GNU_TLS)
6420 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6421 return gen_rtx_PLUS (Pmode, base, off);
6425 base = get_thread_pointer (true);
6426 dest = gen_reg_rtx (Pmode);
6427 emit_insn (gen_subsi3 (dest, base, off));
6438 /* Try machine-dependent ways of modifying an illegitimate address
6439 to be legitimate. If we find one, return the new, valid address.
6440 This macro is used in only one place: `memory_address' in explow.c.
6442 OLDX is the address as it was before break_out_memory_refs was called.
6443 In some cases it is useful to look at this to decide what needs to be done.
6445 MODE and WIN are passed so that this macro can use
6446 GO_IF_LEGITIMATE_ADDRESS.
6448 It is always safe for this macro to do nothing. It exists to recognize
6449 opportunities to optimize the output.
6451 For the 80386, we handle X+REG by loading X into a register R and
6452 using R+REG. R will go in a general reg and indexing will be used.
6453 However, if REG is a broken-out memory address or multiplication,
6454 nothing needs to be done because REG can certainly go in a general reg.
6456 When -fpic is used, special handling is needed for symbolic references.
6457 See comments by legitimize_pic_address in i386.c for details. */
6460 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6461 enum machine_mode mode)
6466 if (TARGET_DEBUG_ADDR)
6468 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6469 GET_MODE_NAME (mode));
6473 log = tls_symbolic_operand (x, mode);
6475 return legitimize_tls_address (x, log, false);
6477 if (flag_pic && SYMBOLIC_CONST (x))
6478 return legitimize_pic_address (x, 0);
6480 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6481 if (GET_CODE (x) == ASHIFT
6482 && GET_CODE (XEXP (x, 1)) == CONST_INT
6483 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6486 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6487 GEN_INT (1 << log));
6490 if (GET_CODE (x) == PLUS)
6492 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6494 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6495 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6496 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6499 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6500 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6501 GEN_INT (1 << log));
6504 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6505 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6506 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6509 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6510 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6511 GEN_INT (1 << log));
6514 /* Put multiply first if it isn't already. */
6515 if (GET_CODE (XEXP (x, 1)) == MULT)
6517 rtx tmp = XEXP (x, 0);
6518 XEXP (x, 0) = XEXP (x, 1);
6523 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6524 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6525 created by virtual register instantiation, register elimination, and
6526 similar optimizations. */
6527 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6530 x = gen_rtx_PLUS (Pmode,
6531 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6532 XEXP (XEXP (x, 1), 0)),
6533 XEXP (XEXP (x, 1), 1));
6537 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6538 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6539 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6540 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6541 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6542 && CONSTANT_P (XEXP (x, 1)))
6545 rtx other = NULL_RTX;
6547 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6549 constant = XEXP (x, 1);
6550 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6552 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6554 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6555 other = XEXP (x, 1);
6563 x = gen_rtx_PLUS (Pmode,
6564 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6565 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6566 plus_constant (other, INTVAL (constant)));
6570 if (changed && legitimate_address_p (mode, x, FALSE))
6573 if (GET_CODE (XEXP (x, 0)) == MULT)
6576 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6579 if (GET_CODE (XEXP (x, 1)) == MULT)
6582 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6586 && GET_CODE (XEXP (x, 1)) == REG
6587 && GET_CODE (XEXP (x, 0)) == REG)
6590 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6593 x = legitimize_pic_address (x, 0);
6596 if (changed && legitimate_address_p (mode, x, FALSE))
6599 if (GET_CODE (XEXP (x, 0)) == REG)
6601 register rtx temp = gen_reg_rtx (Pmode);
6602 register rtx val = force_operand (XEXP (x, 1), temp);
6604 emit_move_insn (temp, val);
6610 else if (GET_CODE (XEXP (x, 1)) == REG)
6612 register rtx temp = gen_reg_rtx (Pmode);
6613 register rtx val = force_operand (XEXP (x, 0), temp);
6615 emit_move_insn (temp, val);
6625 /* Print an integer constant expression in assembler syntax. Addition
6626 and subtraction are the only arithmetic that may appear in these
6627 expressions. FILE is the stdio stream to write to, X is the rtx, and
6628 CODE is the operand print code from the output string. */
6631 output_pic_addr_const (FILE *file, rtx x, int code)
6635 switch (GET_CODE (x))
6645 assemble_name (file, XSTR (x, 0));
6646 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6647 fputs ("@PLT", file);
6654 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6655 assemble_name (asm_out_file, buf);
6659 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6663 /* This used to output parentheses around the expression,
6664 but that does not work on the 386 (either ATT or BSD assembler). */
6665 output_pic_addr_const (file, XEXP (x, 0), code);
6669 if (GET_MODE (x) == VOIDmode)
6671 /* We can use %d if the number is <32 bits and positive. */
6672 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6673 fprintf (file, "0x%lx%08lx",
6674 (unsigned long) CONST_DOUBLE_HIGH (x),
6675 (unsigned long) CONST_DOUBLE_LOW (x));
6677 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6680 /* We can't handle floating point constants;
6681 PRINT_OPERAND must handle them. */
6682 output_operand_lossage ("floating constant misused");
6686 /* Some assemblers need integer constants to appear first. */
6687 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6689 output_pic_addr_const (file, XEXP (x, 0), code);
6691 output_pic_addr_const (file, XEXP (x, 1), code);
6693 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6695 output_pic_addr_const (file, XEXP (x, 1), code);
6697 output_pic_addr_const (file, XEXP (x, 0), code);
6705 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6706 output_pic_addr_const (file, XEXP (x, 0), code);
6708 output_pic_addr_const (file, XEXP (x, 1), code);
6710 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6714 if (XVECLEN (x, 0) != 1)
6716 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6717 switch (XINT (x, 1))
6720 fputs ("@GOT", file);
6723 fputs ("@GOTOFF", file);
6725 case UNSPEC_GOTPCREL:
6726 fputs ("@GOTPCREL(%rip)", file);
6728 case UNSPEC_GOTTPOFF:
6729 /* FIXME: This might be @TPOFF in Sun ld too. */
6730 fputs ("@GOTTPOFF", file);
6733 fputs ("@TPOFF", file);
6737 fputs ("@TPOFF", file);
6739 fputs ("@NTPOFF", file);
6742 fputs ("@DTPOFF", file);
6744 case UNSPEC_GOTNTPOFF:
6746 fputs ("@GOTTPOFF(%rip)", file);
6748 fputs ("@GOTNTPOFF", file);
6750 case UNSPEC_INDNTPOFF:
6751 fputs ("@INDNTPOFF", file);
6754 output_operand_lossage ("invalid UNSPEC as operand");
6760 output_operand_lossage ("invalid expression as operand");
6764 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6765 We need to handle our special PIC relocations. */
6768 i386_dwarf_output_addr_const (FILE *file, rtx x)
6771 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6775 fprintf (file, "%s", ASM_LONG);
6778 output_pic_addr_const (file, x, '\0');
6780 output_addr_const (file, x);
6784 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6785 We need to emit DTP-relative relocations. */
6788 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6790 fputs (ASM_LONG, file);
6791 output_addr_const (file, x);
6792 fputs ("@DTPOFF", file);
6798 fputs (", 0", file);
6805 /* In the name of slightly smaller debug output, and to cater to
6806 general assembler losage, recognize PIC+GOTOFF and turn it back
6807 into a direct symbol reference. */
6810 ix86_delegitimize_address (rtx orig_x)
6814 if (GET_CODE (x) == MEM)
6819 if (GET_CODE (x) != CONST
6820 || GET_CODE (XEXP (x, 0)) != UNSPEC
6821 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6822 || GET_CODE (orig_x) != MEM)
6824 return XVECEXP (XEXP (x, 0), 0, 0);
6827 if (GET_CODE (x) != PLUS
6828 || GET_CODE (XEXP (x, 1)) != CONST)
6831 if (GET_CODE (XEXP (x, 0)) == REG
6832 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6833 /* %ebx + GOT/GOTOFF */
6835 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6837 /* %ebx + %reg * scale + GOT/GOTOFF */
6839 if (GET_CODE (XEXP (y, 0)) == REG
6840 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6842 else if (GET_CODE (XEXP (y, 1)) == REG
6843 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6847 if (GET_CODE (y) != REG
6848 && GET_CODE (y) != MULT
6849 && GET_CODE (y) != ASHIFT)
6855 x = XEXP (XEXP (x, 1), 0);
6856 if (GET_CODE (x) == UNSPEC
6857 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6858 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6861 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6862 return XVECEXP (x, 0, 0);
6865 if (GET_CODE (x) == PLUS
6866 && GET_CODE (XEXP (x, 0)) == UNSPEC
6867 && GET_CODE (XEXP (x, 1)) == CONST_INT
6868 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6869 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6870 && GET_CODE (orig_x) != MEM)))
6872 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6874 return gen_rtx_PLUS (Pmode, y, x);
6882 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6887 if (mode == CCFPmode || mode == CCFPUmode)
6889 enum rtx_code second_code, bypass_code;
6890 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6891 if (bypass_code != NIL || second_code != NIL)
6893 code = ix86_fp_compare_code_to_integer (code);
6897 code = reverse_condition (code);
6908 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6913 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6914 Those same assemblers have the same but opposite losage on cmov. */
6917 suffix = fp ? "nbe" : "a";
6920 if (mode == CCNOmode || mode == CCGOCmode)
6922 else if (mode == CCmode || mode == CCGCmode)
6933 if (mode == CCNOmode || mode == CCGOCmode)
6935 else if (mode == CCmode || mode == CCGCmode)
6944 suffix = fp ? "nb" : "ae";
6947 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6957 suffix = fp ? "u" : "p";
6960 suffix = fp ? "nu" : "np";
6965 fputs (suffix, file);
6969 print_reg (rtx x, int code, FILE *file)
6971 if ((REGNO (x) == ARG_POINTER_REGNUM
6972 || REGNO (x) == FRAME_POINTER_REGNUM
6973 || REGNO (x) == FLAGS_REG
6974 || REGNO (x) == FPSR_REG)
6975 && file == asm_out_file)
6978 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6981 if (code == 'w' || MMX_REG_P (x))
6983 else if (code == 'b')
6985 else if (code == 'k')
6987 else if (code == 'q')
6989 else if (code == 'y')
6991 else if (code == 'h')
6994 code = GET_MODE_SIZE (GET_MODE (x));
6996 /* Irritatingly, AMD extended registers use different naming convention
6997 from the normal registers. */
6998 if (REX_INT_REG_P (x))
7005 error ("extended registers have no high halves");
7008 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7011 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7014 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7017 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7020 error ("unsupported operand size for extended register");
7028 if (STACK_TOP_P (x))
7030 fputs ("st(0)", file);
7037 if (! ANY_FP_REG_P (x))
7038 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7042 fputs (hi_reg_name[REGNO (x)], file);
7045 fputs (qi_reg_name[REGNO (x)], file);
7048 fputs (qi_high_reg_name[REGNO (x)], file);
7055 /* Locate some local-dynamic symbol still in use by this function
7056 so that we can print its name in some tls_local_dynamic_base
7060 get_some_local_dynamic_name (void)
7064 if (cfun->machine->some_ld_name)
7065 return cfun->machine->some_ld_name;
7067 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7069 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7070 return cfun->machine->some_ld_name;
7076 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7080 if (GET_CODE (x) == SYMBOL_REF
7081 && local_dynamic_symbolic_operand (x, Pmode))
7083 cfun->machine->some_ld_name = XSTR (x, 0);
7091 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7092 C -- print opcode suffix for set/cmov insn.
7093 c -- like C, but print reversed condition
7094 F,f -- likewise, but for floating-point.
7095 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7097 R -- print the prefix for register names.
7098 z -- print the opcode suffix for the size of the current operand.
7099 * -- print a star (in certain assembler syntax)
7100 A -- print an absolute memory reference.
7101 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7102 s -- print a shift double count, followed by the assemblers argument
7104 b -- print the QImode name of the register for the indicated operand.
7105 %b0 would print %al if operands[0] is reg 0.
7106 w -- likewise, print the HImode name of the register.
7107 k -- likewise, print the SImode name of the register.
7108 q -- likewise, print the DImode name of the register.
7109 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7110 y -- print "st(0)" instead of "st" as a register.
7111 D -- print condition for SSE cmp instruction.
7112 P -- if PIC, print an @PLT suffix.
7113 X -- don't print any sort of PIC '@' suffix for a symbol.
7114 & -- print some in-use local-dynamic symbol name.
7118 print_operand (FILE *file, rtx x, int code)
7125 if (ASSEMBLER_DIALECT == ASM_ATT)
7130 assemble_name (file, get_some_local_dynamic_name ());
7134 if (ASSEMBLER_DIALECT == ASM_ATT)
7136 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7138 /* Intel syntax. For absolute addresses, registers should not
7139 be surrounded by braces. */
7140 if (GET_CODE (x) != REG)
7143 PRINT_OPERAND (file, x, 0);
7151 PRINT_OPERAND (file, x, 0);
7156 if (ASSEMBLER_DIALECT == ASM_ATT)
7161 if (ASSEMBLER_DIALECT == ASM_ATT)
7166 if (ASSEMBLER_DIALECT == ASM_ATT)
7171 if (ASSEMBLER_DIALECT == ASM_ATT)
7176 if (ASSEMBLER_DIALECT == ASM_ATT)
7181 if (ASSEMBLER_DIALECT == ASM_ATT)
7186 /* 387 opcodes don't get size suffixes if the operands are
7188 if (STACK_REG_P (x))
7191 /* Likewise if using Intel opcodes. */
7192 if (ASSEMBLER_DIALECT == ASM_INTEL)
7195 /* This is the size of op from size of operand. */
7196 switch (GET_MODE_SIZE (GET_MODE (x)))
7199 #ifdef HAVE_GAS_FILDS_FISTS
7205 if (GET_MODE (x) == SFmode)
7220 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7222 #ifdef GAS_MNEMONICS
7248 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7250 PRINT_OPERAND (file, x, 0);
7256 /* Little bit of braindamage here. The SSE compare instructions
7257 does use completely different names for the comparisons that the
7258 fp conditional moves. */
7259 switch (GET_CODE (x))
7274 fputs ("unord", file);
7278 fputs ("neq", file);
7282 fputs ("nlt", file);
7286 fputs ("nle", file);
7289 fputs ("ord", file);
7297 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7298 if (ASSEMBLER_DIALECT == ASM_ATT)
7300 switch (GET_MODE (x))
7302 case HImode: putc ('w', file); break;
7304 case SFmode: putc ('l', file); break;
7306 case DFmode: putc ('q', file); break;
7314 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7317 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7318 if (ASSEMBLER_DIALECT == ASM_ATT)
7321 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7324 /* Like above, but reverse condition */
7326 /* Check to see if argument to %c is really a constant
7327 and not a condition code which needs to be reversed. */
7328 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7330 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7333 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7336 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7337 if (ASSEMBLER_DIALECT == ASM_ATT)
7340 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7346 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7349 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7352 int pred_val = INTVAL (XEXP (x, 0));
7354 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7355 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7357 int taken = pred_val > REG_BR_PROB_BASE / 2;
7358 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7360 /* Emit hints only in the case default branch prediction
7361 heuristics would fail. */
7362 if (taken != cputaken)
7364 /* We use 3e (DS) prefix for taken branches and
7365 2e (CS) prefix for not taken branches. */
7367 fputs ("ds ; ", file);
7369 fputs ("cs ; ", file);
7376 output_operand_lossage ("invalid operand code `%c'", code);
7380 if (GET_CODE (x) == REG)
7382 PRINT_REG (x, code, file);
7385 else if (GET_CODE (x) == MEM)
7387 /* No `byte ptr' prefix for call instructions. */
7388 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7391 switch (GET_MODE_SIZE (GET_MODE (x)))
7393 case 1: size = "BYTE"; break;
7394 case 2: size = "WORD"; break;
7395 case 4: size = "DWORD"; break;
7396 case 8: size = "QWORD"; break;
7397 case 12: size = "XWORD"; break;
7398 case 16: size = "XMMWORD"; break;
7403 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7406 else if (code == 'w')
7408 else if (code == 'k')
7412 fputs (" PTR ", file);
7416 /* Avoid (%rip) for call operands. */
7417 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7418 && GET_CODE (x) != CONST_INT)
7419 output_addr_const (file, x);
7420 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7421 output_operand_lossage ("invalid constraints for operand");
7426 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7431 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7432 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7434 if (ASSEMBLER_DIALECT == ASM_ATT)
7436 fprintf (file, "0x%lx", l);
7439 /* These float cases don't actually occur as immediate operands. */
7440 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7444 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7445 fprintf (file, "%s", dstr);
7448 else if (GET_CODE (x) == CONST_DOUBLE
7449 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7453 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7454 fprintf (file, "%s", dstr);
7461 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7463 if (ASSEMBLER_DIALECT == ASM_ATT)
7466 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7467 || GET_CODE (x) == LABEL_REF)
7469 if (ASSEMBLER_DIALECT == ASM_ATT)
7472 fputs ("OFFSET FLAT:", file);
7475 if (GET_CODE (x) == CONST_INT)
7476 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7478 output_pic_addr_const (file, x, code);
7480 output_addr_const (file, x);
7484 /* Print a memory operand whose address is ADDR. */
7487 print_operand_address (FILE *file, register rtx addr)
7489 struct ix86_address parts;
7490 rtx base, index, disp;
7493 if (! ix86_decompose_address (addr, &parts))
7497 index = parts.index;
7499 scale = parts.scale;
7507 if (USER_LABEL_PREFIX[0] == 0)
7509 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7515 if (!base && !index)
7517 /* Displacement only requires special attention. */
7519 if (GET_CODE (disp) == CONST_INT)
7521 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7523 if (USER_LABEL_PREFIX[0] == 0)
7525 fputs ("ds:", file);
7527 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7530 output_pic_addr_const (file, disp, 0);
7532 output_addr_const (file, disp);
7534 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7536 && ((GET_CODE (disp) == SYMBOL_REF
7537 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7538 || GET_CODE (disp) == LABEL_REF
7539 || (GET_CODE (disp) == CONST
7540 && GET_CODE (XEXP (disp, 0)) == PLUS
7541 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7542 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7543 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7544 fputs ("(%rip)", file);
7548 if (ASSEMBLER_DIALECT == ASM_ATT)
7553 output_pic_addr_const (file, disp, 0);
7554 else if (GET_CODE (disp) == LABEL_REF)
7555 output_asm_label (disp);
7557 output_addr_const (file, disp);
7562 PRINT_REG (base, 0, file);
7566 PRINT_REG (index, 0, file);
7568 fprintf (file, ",%d", scale);
7574 rtx offset = NULL_RTX;
7578 /* Pull out the offset of a symbol; print any symbol itself. */
7579 if (GET_CODE (disp) == CONST
7580 && GET_CODE (XEXP (disp, 0)) == PLUS
7581 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7583 offset = XEXP (XEXP (disp, 0), 1);
7584 disp = gen_rtx_CONST (VOIDmode,
7585 XEXP (XEXP (disp, 0), 0));
7589 output_pic_addr_const (file, disp, 0);
7590 else if (GET_CODE (disp) == LABEL_REF)
7591 output_asm_label (disp);
7592 else if (GET_CODE (disp) == CONST_INT)
7595 output_addr_const (file, disp);
7601 PRINT_REG (base, 0, file);
7604 if (INTVAL (offset) >= 0)
7606 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7610 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7617 PRINT_REG (index, 0, file);
7619 fprintf (file, "*%d", scale);
7627 output_addr_const_extra (FILE *file, rtx x)
7631 if (GET_CODE (x) != UNSPEC)
7634 op = XVECEXP (x, 0, 0);
7635 switch (XINT (x, 1))
7637 case UNSPEC_GOTTPOFF:
7638 output_addr_const (file, op);
7639 /* FIXME: This might be @TPOFF in Sun ld. */
7640 fputs ("@GOTTPOFF", file);
7643 output_addr_const (file, op);
7644 fputs ("@TPOFF", file);
7647 output_addr_const (file, op);
7649 fputs ("@TPOFF", file);
7651 fputs ("@NTPOFF", file);
7654 output_addr_const (file, op);
7655 fputs ("@DTPOFF", file);
7657 case UNSPEC_GOTNTPOFF:
7658 output_addr_const (file, op);
7660 fputs ("@GOTTPOFF(%rip)", file);
7662 fputs ("@GOTNTPOFF", file);
7664 case UNSPEC_INDNTPOFF:
7665 output_addr_const (file, op);
7666 fputs ("@INDNTPOFF", file);
7676 /* Split one or more DImode RTL references into pairs of SImode
7677 references. The RTL can be REG, offsettable MEM, integer constant, or
7678 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7679 split and "num" is its length. lo_half and hi_half are output arrays
7680 that parallel "operands". */
7683 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7687 rtx op = operands[num];
7689 /* simplify_subreg refuse to split volatile memory addresses,
7690 but we still have to handle it. */
7691 if (GET_CODE (op) == MEM)
7693 lo_half[num] = adjust_address (op, SImode, 0);
7694 hi_half[num] = adjust_address (op, SImode, 4);
7698 lo_half[num] = simplify_gen_subreg (SImode, op,
7699 GET_MODE (op) == VOIDmode
7700 ? DImode : GET_MODE (op), 0);
7701 hi_half[num] = simplify_gen_subreg (SImode, op,
7702 GET_MODE (op) == VOIDmode
7703 ? DImode : GET_MODE (op), 4);
7707 /* Split one or more TImode RTL references into pairs of SImode
7708 references. The RTL can be REG, offsettable MEM, integer constant, or
7709 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7710 split and "num" is its length. lo_half and hi_half are output arrays
7711 that parallel "operands". */
7714 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7718 rtx op = operands[num];
7720 /* simplify_subreg refuse to split volatile memory addresses, but we
7721 still have to handle it. */
7722 if (GET_CODE (op) == MEM)
7724 lo_half[num] = adjust_address (op, DImode, 0);
7725 hi_half[num] = adjust_address (op, DImode, 8);
7729 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7730 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7735 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7736 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7737 is the expression of the binary operation. The output may either be
7738 emitted here, or returned to the caller, like all output_* functions.
7740 There is no guarantee that the operands are the same mode, as they
7741 might be within FLOAT or FLOAT_EXTEND expressions. */
7743 #ifndef SYSV386_COMPAT
7744 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7745 wants to fix the assemblers because that causes incompatibility
7746 with gcc. No-one wants to fix gcc because that causes
7747 incompatibility with assemblers... You can use the option of
7748 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7749 #define SYSV386_COMPAT 1
7753 output_387_binary_op (rtx insn, rtx *operands)
7755 static char buf[30];
7758 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7760 #ifdef ENABLE_CHECKING
7761 /* Even if we do not want to check the inputs, this documents input
7762 constraints. Which helps in understanding the following code. */
7763 if (STACK_REG_P (operands[0])
7764 && ((REG_P (operands[1])
7765 && REGNO (operands[0]) == REGNO (operands[1])
7766 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7767 || (REG_P (operands[2])
7768 && REGNO (operands[0]) == REGNO (operands[2])
7769 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7770 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7776 switch (GET_CODE (operands[3]))
7779 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7780 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7788 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7789 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7797 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7798 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7806 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7807 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7821 if (GET_MODE (operands[0]) == SFmode)
7822 strcat (buf, "ss\t{%2, %0|%0, %2}");
7824 strcat (buf, "sd\t{%2, %0|%0, %2}");
7829 switch (GET_CODE (operands[3]))
7833 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7835 rtx temp = operands[2];
7836 operands[2] = operands[1];
7840 /* know operands[0] == operands[1]. */
7842 if (GET_CODE (operands[2]) == MEM)
7848 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7850 if (STACK_TOP_P (operands[0]))
7851 /* How is it that we are storing to a dead operand[2]?
7852 Well, presumably operands[1] is dead too. We can't
7853 store the result to st(0) as st(0) gets popped on this
7854 instruction. Instead store to operands[2] (which I
7855 think has to be st(1)). st(1) will be popped later.
7856 gcc <= 2.8.1 didn't have this check and generated
7857 assembly code that the Unixware assembler rejected. */
7858 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7860 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7864 if (STACK_TOP_P (operands[0]))
7865 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7867 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7872 if (GET_CODE (operands[1]) == MEM)
7878 if (GET_CODE (operands[2]) == MEM)
7884 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7887 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7888 derived assemblers, confusingly reverse the direction of
7889 the operation for fsub{r} and fdiv{r} when the
7890 destination register is not st(0). The Intel assembler
7891 doesn't have this brain damage. Read !SYSV386_COMPAT to
7892 figure out what the hardware really does. */
7893 if (STACK_TOP_P (operands[0]))
7894 p = "{p\t%0, %2|rp\t%2, %0}";
7896 p = "{rp\t%2, %0|p\t%0, %2}";
7898 if (STACK_TOP_P (operands[0]))
7899 /* As above for fmul/fadd, we can't store to st(0). */
7900 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7902 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7907 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7910 if (STACK_TOP_P (operands[0]))
7911 p = "{rp\t%0, %1|p\t%1, %0}";
7913 p = "{p\t%1, %0|rp\t%0, %1}";
7915 if (STACK_TOP_P (operands[0]))
7916 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7918 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7923 if (STACK_TOP_P (operands[0]))
7925 if (STACK_TOP_P (operands[1]))
7926 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7928 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7931 else if (STACK_TOP_P (operands[1]))
7934 p = "{\t%1, %0|r\t%0, %1}";
7936 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7942 p = "{r\t%2, %0|\t%0, %2}";
7944 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7957 /* Output code to initialize control word copies used by
7958 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7959 is set to control word rounding downwards. */
7961 emit_i387_cw_initialization (rtx normal, rtx round_down)
7963 rtx reg = gen_reg_rtx (HImode);
7965 emit_insn (gen_x86_fnstcw_1 (normal));
7966 emit_move_insn (reg, normal);
7967 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7969 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7971 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7972 emit_move_insn (round_down, reg);
7975 /* Output code for INSN to convert a float to a signed int. OPERANDS
7976 are the insn operands. The output may be [HSD]Imode and the input
7977 operand may be [SDX]Fmode. */
7980 output_fix_trunc (rtx insn, rtx *operands)
7982 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7983 int dimode_p = GET_MODE (operands[0]) == DImode;
7985 /* Jump through a hoop or two for DImode, since the hardware has no
7986 non-popping instruction. We used to do this a different way, but
7987 that was somewhat fragile and broke with post-reload splitters. */
7988 if (dimode_p && !stack_top_dies)
7989 output_asm_insn ("fld\t%y1", operands);
7991 if (!STACK_TOP_P (operands[1]))
7994 if (GET_CODE (operands[0]) != MEM)
7997 output_asm_insn ("fldcw\t%3", operands);
7998 if (stack_top_dies || dimode_p)
7999 output_asm_insn ("fistp%z0\t%0", operands);
8001 output_asm_insn ("fist%z0\t%0", operands);
8002 output_asm_insn ("fldcw\t%2", operands);
8007 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8008 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8009 when fucom should be used. */
8012 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8015 rtx cmp_op0 = operands[0];
8016 rtx cmp_op1 = operands[1];
8017 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8022 cmp_op1 = operands[2];
8026 if (GET_MODE (operands[0]) == SFmode)
8028 return "ucomiss\t{%1, %0|%0, %1}";
8030 return "comiss\t{%1, %0|%0, %1}";
8033 return "ucomisd\t{%1, %0|%0, %1}";
8035 return "comisd\t{%1, %0|%0, %1}";
8038 if (! STACK_TOP_P (cmp_op0))
8041 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8043 if (STACK_REG_P (cmp_op1)
8045 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8046 && REGNO (cmp_op1) != FIRST_STACK_REG)
8048 /* If both the top of the 387 stack dies, and the other operand
8049 is also a stack register that dies, then this must be a
8050 `fcompp' float compare */
8054 /* There is no double popping fcomi variant. Fortunately,
8055 eflags is immune from the fstp's cc clobbering. */
8057 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8059 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8067 return "fucompp\n\tfnstsw\t%0";
8069 return "fcompp\n\tfnstsw\t%0";
8082 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8084 static const char * const alt[24] =
8096 "fcomi\t{%y1, %0|%0, %y1}",
8097 "fcomip\t{%y1, %0|%0, %y1}",
8098 "fucomi\t{%y1, %0|%0, %y1}",
8099 "fucomip\t{%y1, %0|%0, %y1}",
8106 "fcom%z2\t%y2\n\tfnstsw\t%0",
8107 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8108 "fucom%z2\t%y2\n\tfnstsw\t%0",
8109 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8111 "ficom%z2\t%y2\n\tfnstsw\t%0",
8112 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8120 mask = eflags_p << 3;
8121 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8122 mask |= unordered_p << 1;
8123 mask |= stack_top_dies;
8136 ix86_output_addr_vec_elt (FILE *file, int value)
8138 const char *directive = ASM_LONG;
8143 directive = ASM_QUAD;
8149 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8153 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8156 fprintf (file, "%s%s%d-%s%d\n",
8157 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8158 else if (HAVE_AS_GOTOFF_IN_DATA)
8159 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8161 else if (TARGET_MACHO)
8163 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8164 machopic_output_function_base_name (file);
8165 fprintf(file, "\n");
8169 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8170 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8173 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8177 ix86_expand_clear (rtx dest)
8181 /* We play register width games, which are only valid after reload. */
8182 if (!reload_completed)
8185 /* Avoid HImode and its attendant prefix byte. */
8186 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8187 dest = gen_rtx_REG (SImode, REGNO (dest));
8189 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8191 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8192 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8194 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8195 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8201 /* X is an unchanging MEM. If it is a constant pool reference, return
8202 the constant pool rtx, else NULL. */
8205 maybe_get_pool_constant (rtx x)
8207 x = ix86_delegitimize_address (XEXP (x, 0));
8209 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8210 return get_pool_constant (x);
8216 ix86_expand_move (enum machine_mode mode, rtx operands[])
8218 int strict = (reload_in_progress || reload_completed);
8220 enum tls_model model;
8225 model = tls_symbolic_operand (op1, Pmode);
8228 op1 = legitimize_tls_address (op1, model, true);
8229 op1 = force_operand (op1, op0);
8234 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8239 rtx temp = ((reload_in_progress
8240 || ((op0 && GET_CODE (op0) == REG)
8242 ? op0 : gen_reg_rtx (Pmode));
8243 op1 = machopic_indirect_data_reference (op1, temp);
8244 op1 = machopic_legitimize_pic_address (op1, mode,
8245 temp == op1 ? 0 : temp);
8247 else if (MACHOPIC_INDIRECT)
8248 op1 = machopic_indirect_data_reference (op1, 0);
8252 if (GET_CODE (op0) == MEM)
8253 op1 = force_reg (Pmode, op1);
8257 if (GET_CODE (temp) != REG)
8258 temp = gen_reg_rtx (Pmode);
8259 temp = legitimize_pic_address (op1, temp);
8264 #endif /* TARGET_MACHO */
8268 if (GET_CODE (op0) == MEM
8269 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8270 || !push_operand (op0, mode))
8271 && GET_CODE (op1) == MEM)
8272 op1 = force_reg (mode, op1);
8274 if (push_operand (op0, mode)
8275 && ! general_no_elim_operand (op1, mode))
8276 op1 = copy_to_mode_reg (mode, op1);
8278 /* Force large constants in 64bit compilation into register
8279 to get them CSEed. */
8280 if (TARGET_64BIT && mode == DImode
8281 && immediate_operand (op1, mode)
8282 && !x86_64_zero_extended_value (op1)
8283 && !register_operand (op0, mode)
8284 && optimize && !reload_completed && !reload_in_progress)
8285 op1 = copy_to_mode_reg (mode, op1);
8287 if (FLOAT_MODE_P (mode))
8289 /* If we are loading a floating point constant to a register,
8290 force the value to memory now, since we'll get better code
8291 out the back end. */
8295 else if (GET_CODE (op1) == CONST_DOUBLE)
8297 op1 = validize_mem (force_const_mem (mode, op1));
8298 if (!register_operand (op0, mode))
8300 rtx temp = gen_reg_rtx (mode);
8301 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8302 emit_move_insn (op0, temp);
8309 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8313 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8315 /* Force constants other than zero into memory. We do not know how
8316 the instructions used to build constants modify the upper 64 bits
8317 of the register, once we have that information we may be able
8318 to handle some of them more efficiently. */
8319 if ((reload_in_progress | reload_completed) == 0
8320 && register_operand (operands[0], mode)
8321 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8322 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8324 /* Make operand1 a register if it isn't already. */
8326 && !register_operand (operands[0], mode)
8327 && !register_operand (operands[1], mode))
8329 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8330 emit_move_insn (operands[0], temp);
8334 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8337 /* Attempt to expand a binary operator. Make the expansion closer to the
8338 actual machine, then just general_operand, which will allow 3 separate
8339 memory references (one output, two input) in a single insn. */
8342 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8345 int matching_memory;
8346 rtx src1, src2, dst, op, clob;
8352 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8353 if (GET_RTX_CLASS (code) == 'c'
8354 && (rtx_equal_p (dst, src2)
8355 || immediate_operand (src1, mode)))
8362 /* If the destination is memory, and we do not have matching source
8363 operands, do things in registers. */
8364 matching_memory = 0;
8365 if (GET_CODE (dst) == MEM)
8367 if (rtx_equal_p (dst, src1))
8368 matching_memory = 1;
8369 else if (GET_RTX_CLASS (code) == 'c'
8370 && rtx_equal_p (dst, src2))
8371 matching_memory = 2;
8373 dst = gen_reg_rtx (mode);
8376 /* Both source operands cannot be in memory. */
8377 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8379 if (matching_memory != 2)
8380 src2 = force_reg (mode, src2);
8382 src1 = force_reg (mode, src1);
8385 /* If the operation is not commutable, source 1 cannot be a constant
8386 or non-matching memory. */
8387 if ((CONSTANT_P (src1)
8388 || (!matching_memory && GET_CODE (src1) == MEM))
8389 && GET_RTX_CLASS (code) != 'c')
8390 src1 = force_reg (mode, src1);
8392 /* If optimizing, copy to regs to improve CSE */
8393 if (optimize && ! no_new_pseudos)
8395 if (GET_CODE (dst) == MEM)
8396 dst = gen_reg_rtx (mode);
8397 if (GET_CODE (src1) == MEM)
8398 src1 = force_reg (mode, src1);
8399 if (GET_CODE (src2) == MEM)
8400 src2 = force_reg (mode, src2);
8403 /* Emit the instruction. */
8405 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8406 if (reload_in_progress)
8408 /* Reload doesn't know about the flags register, and doesn't know that
8409 it doesn't want to clobber it. We can only do this with PLUS. */
8416 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8417 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8420 /* Fix up the destination if needed. */
8421 if (dst != operands[0])
8422 emit_move_insn (operands[0], dst);
8425 /* Return TRUE or FALSE depending on whether the binary operator meets the
8426 appropriate constraints. */
8429 ix86_binary_operator_ok (enum rtx_code code,
8430 enum machine_mode mode ATTRIBUTE_UNUSED,
8433 /* Both source operands cannot be in memory. */
8434 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8436 /* If the operation is not commutable, source 1 cannot be a constant. */
8437 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8439 /* If the destination is memory, we must have a matching source operand. */
8440 if (GET_CODE (operands[0]) == MEM
8441 && ! (rtx_equal_p (operands[0], operands[1])
8442 || (GET_RTX_CLASS (code) == 'c'
8443 && rtx_equal_p (operands[0], operands[2]))))
8445 /* If the operation is not commutable and the source 1 is memory, we must
8446 have a matching destination. */
8447 if (GET_CODE (operands[1]) == MEM
8448 && GET_RTX_CLASS (code) != 'c'
8449 && ! rtx_equal_p (operands[0], operands[1]))
8454 /* Attempt to expand a unary operator. Make the expansion closer to the
8455 actual machine, then just general_operand, which will allow 2 separate
8456 memory references (one output, one input) in a single insn. */
8459 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8462 int matching_memory;
8463 rtx src, dst, op, clob;
8468 /* If the destination is memory, and we do not have matching source
8469 operands, do things in registers. */
8470 matching_memory = 0;
8471 if (GET_CODE (dst) == MEM)
8473 if (rtx_equal_p (dst, src))
8474 matching_memory = 1;
8476 dst = gen_reg_rtx (mode);
8479 /* When source operand is memory, destination must match. */
8480 if (!matching_memory && GET_CODE (src) == MEM)
8481 src = force_reg (mode, src);
8483 /* If optimizing, copy to regs to improve CSE */
8484 if (optimize && ! no_new_pseudos)
8486 if (GET_CODE (dst) == MEM)
8487 dst = gen_reg_rtx (mode);
8488 if (GET_CODE (src) == MEM)
8489 src = force_reg (mode, src);
8492 /* Emit the instruction. */
8494 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8495 if (reload_in_progress || code == NOT)
8497 /* Reload doesn't know about the flags register, and doesn't know that
8498 it doesn't want to clobber it. */
8505 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8506 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8509 /* Fix up the destination if needed. */
8510 if (dst != operands[0])
8511 emit_move_insn (operands[0], dst);
8514 /* Return TRUE or FALSE depending on whether the unary operator meets the
8515 appropriate constraints. */
8518 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8519 enum machine_mode mode ATTRIBUTE_UNUSED,
8520 rtx operands[2] ATTRIBUTE_UNUSED)
8522 /* If one of operands is memory, source and destination must match. */
8523 if ((GET_CODE (operands[0]) == MEM
8524 || GET_CODE (operands[1]) == MEM)
8525 && ! rtx_equal_p (operands[0], operands[1]))
8530 /* Return TRUE or FALSE depending on whether the first SET in INSN
8531 has source and destination with matching CC modes, and that the
8532 CC mode is at least as constrained as REQ_MODE. */
8535 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8538 enum machine_mode set_mode;
8540 set = PATTERN (insn);
8541 if (GET_CODE (set) == PARALLEL)
8542 set = XVECEXP (set, 0, 0);
8543 if (GET_CODE (set) != SET)
8545 if (GET_CODE (SET_SRC (set)) != COMPARE)
8548 set_mode = GET_MODE (SET_DEST (set));
8552 if (req_mode != CCNOmode
8553 && (req_mode != CCmode
8554 || XEXP (SET_SRC (set), 1) != const0_rtx))
8558 if (req_mode == CCGCmode)
8562 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8566 if (req_mode == CCZmode)
8576 return (GET_MODE (SET_SRC (set)) == set_mode);
8579 /* Generate insn patterns to do an integer compare of OPERANDS. */
8582 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8584 enum machine_mode cmpmode;
8587 cmpmode = SELECT_CC_MODE (code, op0, op1);
8588 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8590 /* This is very simple, but making the interface the same as in the
8591 FP case makes the rest of the code easier. */
8592 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8593 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8595 /* Return the test that should be put into the flags user, i.e.
8596 the bcc, scc, or cmov instruction. */
8597 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8600 /* Figure out whether to use ordered or unordered fp comparisons.
8601 Return the appropriate mode to use. */
8604 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8606 /* ??? In order to make all comparisons reversible, we do all comparisons
8607 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8608 all forms trapping and nontrapping comparisons, we can make inequality
8609 comparisons trapping again, since it results in better code when using
8610 FCOM based compares. */
8611 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8615 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8617 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8618 return ix86_fp_compare_mode (code);
8621 /* Only zero flag is needed. */
8623 case NE: /* ZF!=0 */
8625 /* Codes needing carry flag. */
8626 case GEU: /* CF=0 */
8627 case GTU: /* CF=0 & ZF=0 */
8628 case LTU: /* CF=1 */
8629 case LEU: /* CF=1 | ZF=1 */
8631 /* Codes possibly doable only with sign flag when
8632 comparing against zero. */
8633 case GE: /* SF=OF or SF=0 */
8634 case LT: /* SF<>OF or SF=1 */
8635 if (op1 == const0_rtx)
8638 /* For other cases Carry flag is not required. */
8640 /* Codes doable only with sign flag when comparing
8641 against zero, but we miss jump instruction for it
8642 so we need to use relational tests against overflow
8643 that thus needs to be zero. */
8644 case GT: /* ZF=0 & SF=OF */
8645 case LE: /* ZF=1 | SF<>OF */
8646 if (op1 == const0_rtx)
8650 /* strcmp pattern do (use flags) and combine may ask us for proper
8659 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8662 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8664 enum rtx_code swapped_code = swap_condition (code);
8665 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8666 || (ix86_fp_comparison_cost (swapped_code)
8667 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8670 /* Swap, force into registers, or otherwise massage the two operands
8671 to a fp comparison. The operands are updated in place; the new
8672 comparison code is returned. */
8674 static enum rtx_code
8675 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8677 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8678 rtx op0 = *pop0, op1 = *pop1;
8679 enum machine_mode op_mode = GET_MODE (op0);
8680 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8682 /* All of the unordered compare instructions only work on registers.
8683 The same is true of the XFmode compare instructions. The same is
8684 true of the fcomi compare instructions. */
8687 && (fpcmp_mode == CCFPUmode
8688 || op_mode == XFmode
8689 || op_mode == TFmode
8690 || ix86_use_fcomi_compare (code)))
8692 op0 = force_reg (op_mode, op0);
8693 op1 = force_reg (op_mode, op1);
8697 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8698 things around if they appear profitable, otherwise force op0
8701 if (standard_80387_constant_p (op0) == 0
8702 || (GET_CODE (op0) == MEM
8703 && ! (standard_80387_constant_p (op1) == 0
8704 || GET_CODE (op1) == MEM)))
8707 tmp = op0, op0 = op1, op1 = tmp;
8708 code = swap_condition (code);
8711 if (GET_CODE (op0) != REG)
8712 op0 = force_reg (op_mode, op0);
8714 if (CONSTANT_P (op1))
8716 if (standard_80387_constant_p (op1))
8717 op1 = force_reg (op_mode, op1);
8719 op1 = validize_mem (force_const_mem (op_mode, op1));
8723 /* Try to rearrange the comparison to make it cheaper. */
8724 if (ix86_fp_comparison_cost (code)
8725 > ix86_fp_comparison_cost (swap_condition (code))
8726 && (GET_CODE (op1) == REG || !no_new_pseudos))
8729 tmp = op0, op0 = op1, op1 = tmp;
8730 code = swap_condition (code);
8731 if (GET_CODE (op0) != REG)
8732 op0 = force_reg (op_mode, op0);
8740 /* Convert comparison codes we use to represent FP comparison to integer
8741 code that will result in proper branch. Return UNKNOWN if no such code
8743 static enum rtx_code
8744 ix86_fp_compare_code_to_integer (enum rtx_code code)
8773 /* Split comparison code CODE into comparisons we can do using branch
8774 instructions. BYPASS_CODE is comparison code for branch that will
8775 branch around FIRST_CODE and SECOND_CODE. If some of branches
8776 is not required, set value to NIL.
8777 We never require more than two branches. */
8779 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8780 enum rtx_code *first_code,
8781 enum rtx_code *second_code)
8787 /* The fcomi comparison sets flags as follows:
8797 case GT: /* GTU - CF=0 & ZF=0 */
8798 case GE: /* GEU - CF=0 */
8799 case ORDERED: /* PF=0 */
8800 case UNORDERED: /* PF=1 */
8801 case UNEQ: /* EQ - ZF=1 */
8802 case UNLT: /* LTU - CF=1 */
8803 case UNLE: /* LEU - CF=1 | ZF=1 */
8804 case LTGT: /* EQ - ZF=0 */
8806 case LT: /* LTU - CF=1 - fails on unordered */
8808 *bypass_code = UNORDERED;
8810 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8812 *bypass_code = UNORDERED;
8814 case EQ: /* EQ - ZF=1 - fails on unordered */
8816 *bypass_code = UNORDERED;
8818 case NE: /* NE - ZF=0 - fails on unordered */
8820 *second_code = UNORDERED;
8822 case UNGE: /* GEU - CF=0 - fails on unordered */
8824 *second_code = UNORDERED;
8826 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8828 *second_code = UNORDERED;
8833 if (!TARGET_IEEE_FP)
8840 /* Return cost of comparison done fcom + arithmetics operations on AX.
8841 All following functions do use number of instructions as a cost metrics.
8842 In future this should be tweaked to compute bytes for optimize_size and
8843 take into account performance of various instructions on various CPUs. */
8845 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8847 if (!TARGET_IEEE_FP)
8849 /* The cost of code output by ix86_expand_fp_compare. */
8877 /* Return cost of comparison done using fcomi operation.
8878 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8880 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8882 enum rtx_code bypass_code, first_code, second_code;
8883 /* Return arbitrarily high cost when instruction is not supported - this
8884 prevents gcc from using it. */
8887 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8888 return (bypass_code != NIL || second_code != NIL) + 2;
8891 /* Return cost of comparison done using sahf operation.
8892 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8894 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8896 enum rtx_code bypass_code, first_code, second_code;
8897 /* Return arbitrarily high cost when instruction is not preferred - this
8898 avoids gcc from using it. */
8899 if (!TARGET_USE_SAHF && !optimize_size)
8901 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8902 return (bypass_code != NIL || second_code != NIL) + 3;
8905 /* Compute cost of the comparison done using any method.
8906 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8908 ix86_fp_comparison_cost (enum rtx_code code)
8910 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8913 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8914 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8916 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8917 if (min > sahf_cost)
8919 if (min > fcomi_cost)
8924 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8927 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8928 rtx *second_test, rtx *bypass_test)
8930 enum machine_mode fpcmp_mode, intcmp_mode;
8932 int cost = ix86_fp_comparison_cost (code);
8933 enum rtx_code bypass_code, first_code, second_code;
8935 fpcmp_mode = ix86_fp_compare_mode (code);
8936 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8939 *second_test = NULL_RTX;
8941 *bypass_test = NULL_RTX;
8943 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8945 /* Do fcomi/sahf based test when profitable. */
8946 if ((bypass_code == NIL || bypass_test)
8947 && (second_code == NIL || second_test)
8948 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8952 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8953 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8959 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8960 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8962 scratch = gen_reg_rtx (HImode);
8963 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8964 emit_insn (gen_x86_sahf_1 (scratch));
8967 /* The FP codes work out to act like unsigned. */
8968 intcmp_mode = fpcmp_mode;
8970 if (bypass_code != NIL)
8971 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8972 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8974 if (second_code != NIL)
8975 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8976 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8981 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8982 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8983 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8985 scratch = gen_reg_rtx (HImode);
8986 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8988 /* In the unordered case, we have to check C2 for NaN's, which
8989 doesn't happen to work out to anything nice combination-wise.
8990 So do some bit twiddling on the value we've got in AH to come
8991 up with an appropriate set of condition codes. */
8993 intcmp_mode = CCNOmode;
8998 if (code == GT || !TARGET_IEEE_FP)
9000 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9005 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9006 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9007 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9008 intcmp_mode = CCmode;
9014 if (code == LT && TARGET_IEEE_FP)
9016 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9017 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9018 intcmp_mode = CCmode;
9023 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9029 if (code == GE || !TARGET_IEEE_FP)
9031 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9036 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9037 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9044 if (code == LE && TARGET_IEEE_FP)
9046 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9047 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9048 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9049 intcmp_mode = CCmode;
9054 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9060 if (code == EQ && TARGET_IEEE_FP)
9062 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9063 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9064 intcmp_mode = CCmode;
9069 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9076 if (code == NE && TARGET_IEEE_FP)
9078 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9079 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9085 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9091 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9095 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9104 /* Return the test that should be put into the flags user, i.e.
9105 the bcc, scc, or cmov instruction. */
9106 return gen_rtx_fmt_ee (code, VOIDmode,
9107 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9112 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9115 op0 = ix86_compare_op0;
9116 op1 = ix86_compare_op1;
9119 *second_test = NULL_RTX;
9121 *bypass_test = NULL_RTX;
9123 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9124 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9125 second_test, bypass_test);
9127 ret = ix86_expand_int_compare (code, op0, op1);
9132 /* Return true if the CODE will result in nontrivial jump sequence. */
9134 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9136 enum rtx_code bypass_code, first_code, second_code;
9139 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9140 return bypass_code != NIL || second_code != NIL;
9144 ix86_expand_branch (enum rtx_code code, rtx label)
9148 switch (GET_MODE (ix86_compare_op0))
9154 tmp = ix86_expand_compare (code, NULL, NULL);
9155 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9156 gen_rtx_LABEL_REF (VOIDmode, label),
9158 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9168 enum rtx_code bypass_code, first_code, second_code;
9170 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9173 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9175 /* Check whether we will use the natural sequence with one jump. If
9176 so, we can expand jump early. Otherwise delay expansion by
9177 creating compound insn to not confuse optimizers. */
9178 if (bypass_code == NIL && second_code == NIL
9181 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9182 gen_rtx_LABEL_REF (VOIDmode, label),
9187 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9188 ix86_compare_op0, ix86_compare_op1);
9189 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9190 gen_rtx_LABEL_REF (VOIDmode, label),
9192 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9194 use_fcomi = ix86_use_fcomi_compare (code);
9195 vec = rtvec_alloc (3 + !use_fcomi);
9196 RTVEC_ELT (vec, 0) = tmp;
9198 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9200 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9203 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9205 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9213 /* Expand DImode branch into multiple compare+branch. */
9215 rtx lo[2], hi[2], label2;
9216 enum rtx_code code1, code2, code3;
9218 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9220 tmp = ix86_compare_op0;
9221 ix86_compare_op0 = ix86_compare_op1;
9222 ix86_compare_op1 = tmp;
9223 code = swap_condition (code);
9225 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9226 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9228 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9229 avoid two branches. This costs one extra insn, so disable when
9230 optimizing for size. */
9232 if ((code == EQ || code == NE)
9234 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9239 if (hi[1] != const0_rtx)
9240 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9241 NULL_RTX, 0, OPTAB_WIDEN);
9244 if (lo[1] != const0_rtx)
9245 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9246 NULL_RTX, 0, OPTAB_WIDEN);
9248 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9249 NULL_RTX, 0, OPTAB_WIDEN);
9251 ix86_compare_op0 = tmp;
9252 ix86_compare_op1 = const0_rtx;
9253 ix86_expand_branch (code, label);
9257 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9258 op1 is a constant and the low word is zero, then we can just
9259 examine the high word. */
9261 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9264 case LT: case LTU: case GE: case GEU:
9265 ix86_compare_op0 = hi[0];
9266 ix86_compare_op1 = hi[1];
9267 ix86_expand_branch (code, label);
9273 /* Otherwise, we need two or three jumps. */
9275 label2 = gen_label_rtx ();
9278 code2 = swap_condition (code);
9279 code3 = unsigned_condition (code);
9283 case LT: case GT: case LTU: case GTU:
9286 case LE: code1 = LT; code2 = GT; break;
9287 case GE: code1 = GT; code2 = LT; break;
9288 case LEU: code1 = LTU; code2 = GTU; break;
9289 case GEU: code1 = GTU; code2 = LTU; break;
9291 case EQ: code1 = NIL; code2 = NE; break;
9292 case NE: code2 = NIL; break;
9300 * if (hi(a) < hi(b)) goto true;
9301 * if (hi(a) > hi(b)) goto false;
9302 * if (lo(a) < lo(b)) goto true;
9306 ix86_compare_op0 = hi[0];
9307 ix86_compare_op1 = hi[1];
9310 ix86_expand_branch (code1, label);
9312 ix86_expand_branch (code2, label2);
9314 ix86_compare_op0 = lo[0];
9315 ix86_compare_op1 = lo[1];
9316 ix86_expand_branch (code3, label);
9319 emit_label (label2);
9328 /* Split branch based on floating point condition. */
9330 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9331 rtx target1, rtx target2, rtx tmp)
9334 rtx label = NULL_RTX;
9336 int bypass_probability = -1, second_probability = -1, probability = -1;
9339 if (target2 != pc_rtx)
9342 code = reverse_condition_maybe_unordered (code);
9347 condition = ix86_expand_fp_compare (code, op1, op2,
9348 tmp, &second, &bypass);
9350 if (split_branch_probability >= 0)
9352 /* Distribute the probabilities across the jumps.
9353 Assume the BYPASS and SECOND to be always test
9355 probability = split_branch_probability;
9357 /* Value of 1 is low enough to make no need for probability
9358 to be updated. Later we may run some experiments and see
9359 if unordered values are more frequent in practice. */
9361 bypass_probability = 1;
9363 second_probability = 1;
9365 if (bypass != NULL_RTX)
9367 label = gen_label_rtx ();
9368 i = emit_jump_insn (gen_rtx_SET
9370 gen_rtx_IF_THEN_ELSE (VOIDmode,
9372 gen_rtx_LABEL_REF (VOIDmode,
9375 if (bypass_probability >= 0)
9377 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9378 GEN_INT (bypass_probability),
9381 i = emit_jump_insn (gen_rtx_SET
9383 gen_rtx_IF_THEN_ELSE (VOIDmode,
9384 condition, target1, target2)));
9385 if (probability >= 0)
9387 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9388 GEN_INT (probability),
9390 if (second != NULL_RTX)
9392 i = emit_jump_insn (gen_rtx_SET
9394 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9396 if (second_probability >= 0)
9398 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9399 GEN_INT (second_probability),
9402 if (label != NULL_RTX)
9407 ix86_expand_setcc (enum rtx_code code, rtx dest)
9409 rtx ret, tmp, tmpreg, equiv;
9410 rtx second_test, bypass_test;
9412 if (GET_MODE (ix86_compare_op0) == DImode
9414 return 0; /* FAIL */
9416 if (GET_MODE (dest) != QImode)
9419 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9420 PUT_MODE (ret, QImode);
9425 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9426 if (bypass_test || second_test)
9428 rtx test = second_test;
9430 rtx tmp2 = gen_reg_rtx (QImode);
9437 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9439 PUT_MODE (test, QImode);
9440 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9443 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9445 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9448 /* Attach a REG_EQUAL note describing the comparison result. */
9449 equiv = simplify_gen_relational (code, QImode,
9450 GET_MODE (ix86_compare_op0),
9451 ix86_compare_op0, ix86_compare_op1);
9452 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9454 return 1; /* DONE */
9457 /* Expand comparison setting or clearing carry flag. Return true when successful
9458 and set pop for the operation. */
9460 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9462 enum machine_mode mode =
9463 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9465 /* Do not handle DImode compares that go trought special path. Also we can't
9466 deal with FP compares yet. This is possible to add. */
9467 if ((mode == DImode && !TARGET_64BIT))
9469 if (FLOAT_MODE_P (mode))
9471 rtx second_test = NULL, bypass_test = NULL;
9472 rtx compare_op, compare_seq;
9474 /* Shortcut: following common codes never translate into carry flag compares. */
9475 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9476 || code == ORDERED || code == UNORDERED)
9479 /* These comparisons require zero flag; swap operands so they won't. */
9480 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9486 code = swap_condition (code);
9489 /* Try to expand the comparison and verify that we end up with carry flag
9490 based comparison. This is fails to be true only when we decide to expand
9491 comparison using arithmetic that is not too common scenario. */
9493 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9494 &second_test, &bypass_test);
9495 compare_seq = get_insns ();
9498 if (second_test || bypass_test)
9500 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9501 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9502 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9504 code = GET_CODE (compare_op);
9505 if (code != LTU && code != GEU)
9507 emit_insn (compare_seq);
9511 if (!INTEGRAL_MODE_P (mode))
9519 /* Convert a==0 into (unsigned)a<1. */
9522 if (op1 != const0_rtx)
9525 code = (code == EQ ? LTU : GEU);
9528 /* Convert a>b into b<a or a>=b-1. */
9531 if (GET_CODE (op1) == CONST_INT)
9533 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9534 /* Bail out on overflow. We still can swap operands but that
9535 would force loading of the constant into register. */
9536 if (op1 == const0_rtx
9537 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9539 code = (code == GTU ? GEU : LTU);
9546 code = (code == GTU ? LTU : GEU);
9550 /* Convert a>=0 into (unsigned)a<0x80000000. */
9553 if (mode == DImode || op1 != const0_rtx)
9555 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9556 code = (code == LT ? GEU : LTU);
9560 if (mode == DImode || op1 != constm1_rtx)
9562 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9563 code = (code == LE ? GEU : LTU);
9569 /* Swapping operands may cause constant to appear as first operand. */
9570 if (!nonimmediate_operand (op0, VOIDmode))
9574 op0 = force_reg (mode, op0);
9576 ix86_compare_op0 = op0;
9577 ix86_compare_op1 = op1;
9578 *pop = ix86_expand_compare (code, NULL, NULL);
9579 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9585 ix86_expand_int_movcc (rtx operands[])
9587 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9588 rtx compare_seq, compare_op;
9589 rtx second_test, bypass_test;
9590 enum machine_mode mode = GET_MODE (operands[0]);
9591 bool sign_bit_compare_p = false;;
9594 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9595 compare_seq = get_insns ();
9598 compare_code = GET_CODE (compare_op);
9600 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9601 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9602 sign_bit_compare_p = true;
9604 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9605 HImode insns, we'd be swallowed in word prefix ops. */
9607 if ((mode != HImode || TARGET_FAST_PREFIX)
9608 && (mode != DImode || TARGET_64BIT)
9609 && GET_CODE (operands[2]) == CONST_INT
9610 && GET_CODE (operands[3]) == CONST_INT)
9612 rtx out = operands[0];
9613 HOST_WIDE_INT ct = INTVAL (operands[2]);
9614 HOST_WIDE_INT cf = INTVAL (operands[3]);
9618 /* Sign bit compares are better done using shifts than we do by using
9620 if (sign_bit_compare_p
9621 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9622 ix86_compare_op1, &compare_op))
9624 /* Detect overlap between destination and compare sources. */
9627 if (!sign_bit_compare_p)
9631 compare_code = GET_CODE (compare_op);
9633 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9634 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9637 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9640 /* To simplify rest of code, restrict to the GEU case. */
9641 if (compare_code == LTU)
9643 HOST_WIDE_INT tmp = ct;
9646 compare_code = reverse_condition (compare_code);
9647 code = reverse_condition (code);
9652 PUT_CODE (compare_op,
9653 reverse_condition_maybe_unordered
9654 (GET_CODE (compare_op)));
9656 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9660 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9661 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9662 tmp = gen_reg_rtx (mode);
9665 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9667 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9671 if (code == GT || code == GE)
9672 code = reverse_condition (code);
9675 HOST_WIDE_INT tmp = ct;
9680 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9681 ix86_compare_op1, VOIDmode, 0, -1);
9694 tmp = expand_simple_binop (mode, PLUS,
9696 copy_rtx (tmp), 1, OPTAB_DIRECT);
9707 tmp = expand_simple_binop (mode, IOR,
9709 copy_rtx (tmp), 1, OPTAB_DIRECT);
9711 else if (diff == -1 && ct)
9721 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9723 tmp = expand_simple_binop (mode, PLUS,
9724 copy_rtx (tmp), GEN_INT (cf),
9725 copy_rtx (tmp), 1, OPTAB_DIRECT);
9733 * andl cf - ct, dest
9743 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9746 tmp = expand_simple_binop (mode, AND,
9748 gen_int_mode (cf - ct, mode),
9749 copy_rtx (tmp), 1, OPTAB_DIRECT);
9751 tmp = expand_simple_binop (mode, PLUS,
9752 copy_rtx (tmp), GEN_INT (ct),
9753 copy_rtx (tmp), 1, OPTAB_DIRECT);
9756 if (!rtx_equal_p (tmp, out))
9757 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9759 return 1; /* DONE */
9765 tmp = ct, ct = cf, cf = tmp;
9767 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9769 /* We may be reversing unordered compare to normal compare, that
9770 is not valid in general (we may convert non-trapping condition
9771 to trapping one), however on i386 we currently emit all
9772 comparisons unordered. */
9773 compare_code = reverse_condition_maybe_unordered (compare_code);
9774 code = reverse_condition_maybe_unordered (code);
9778 compare_code = reverse_condition (compare_code);
9779 code = reverse_condition (code);
9784 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9785 && GET_CODE (ix86_compare_op1) == CONST_INT)
9787 if (ix86_compare_op1 == const0_rtx
9788 && (code == LT || code == GE))
9789 compare_code = code;
9790 else if (ix86_compare_op1 == constm1_rtx)
9794 else if (code == GT)
9799 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9800 if (compare_code != NIL
9801 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9802 && (cf == -1 || ct == -1))
9804 /* If lea code below could be used, only optimize
9805 if it results in a 2 insn sequence. */
9807 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9808 || diff == 3 || diff == 5 || diff == 9)
9809 || (compare_code == LT && ct == -1)
9810 || (compare_code == GE && cf == -1))
9813 * notl op1 (if necessary)
9821 code = reverse_condition (code);
9824 out = emit_store_flag (out, code, ix86_compare_op0,
9825 ix86_compare_op1, VOIDmode, 0, -1);
9827 out = expand_simple_binop (mode, IOR,
9829 out, 1, OPTAB_DIRECT);
9830 if (out != operands[0])
9831 emit_move_insn (operands[0], out);
9833 return 1; /* DONE */
9838 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9839 || diff == 3 || diff == 5 || diff == 9)
9840 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9841 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9847 * lea cf(dest*(ct-cf)),dest
9851 * This also catches the degenerate setcc-only case.
9857 out = emit_store_flag (out, code, ix86_compare_op0,
9858 ix86_compare_op1, VOIDmode, 0, 1);
9861 /* On x86_64 the lea instruction operates on Pmode, so we need
9862 to get arithmetics done in proper mode to match. */
9864 tmp = copy_rtx (out);
9868 out1 = copy_rtx (out);
9869 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9873 tmp = gen_rtx_PLUS (mode, tmp, out1);
9879 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9882 if (!rtx_equal_p (tmp, out))
9885 out = force_operand (tmp, copy_rtx (out));
9887 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9889 if (!rtx_equal_p (out, operands[0]))
9890 emit_move_insn (operands[0], copy_rtx (out));
9892 return 1; /* DONE */
9896 * General case: Jumpful:
9897 * xorl dest,dest cmpl op1, op2
9898 * cmpl op1, op2 movl ct, dest
9900 * decl dest movl cf, dest
9901 * andl (cf-ct),dest 1:
9906 * This is reasonably steep, but branch mispredict costs are
9907 * high on modern cpus, so consider failing only if optimizing
9911 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9912 && BRANCH_COST >= 2)
9918 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9919 /* We may be reversing unordered compare to normal compare,
9920 that is not valid in general (we may convert non-trapping
9921 condition to trapping one), however on i386 we currently
9922 emit all comparisons unordered. */
9923 code = reverse_condition_maybe_unordered (code);
9926 code = reverse_condition (code);
9927 if (compare_code != NIL)
9928 compare_code = reverse_condition (compare_code);
9932 if (compare_code != NIL)
9934 /* notl op1 (if needed)
9939 For x < 0 (resp. x <= -1) there will be no notl,
9940 so if possible swap the constants to get rid of the
9942 True/false will be -1/0 while code below (store flag
9943 followed by decrement) is 0/-1, so the constants need
9944 to be exchanged once more. */
9946 if (compare_code == GE || !cf)
9948 code = reverse_condition (code);
9953 HOST_WIDE_INT tmp = cf;
9958 out = emit_store_flag (out, code, ix86_compare_op0,
9959 ix86_compare_op1, VOIDmode, 0, -1);
9963 out = emit_store_flag (out, code, ix86_compare_op0,
9964 ix86_compare_op1, VOIDmode, 0, 1);
9966 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9967 copy_rtx (out), 1, OPTAB_DIRECT);
9970 out = expand_simple_binop (mode, AND, copy_rtx (out),
9971 gen_int_mode (cf - ct, mode),
9972 copy_rtx (out), 1, OPTAB_DIRECT);
9974 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9975 copy_rtx (out), 1, OPTAB_DIRECT);
9976 if (!rtx_equal_p (out, operands[0]))
9977 emit_move_insn (operands[0], copy_rtx (out));
9979 return 1; /* DONE */
9983 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9985 /* Try a few things more with specific constants and a variable. */
9988 rtx var, orig_out, out, tmp;
9990 if (BRANCH_COST <= 2)
9991 return 0; /* FAIL */
9993 /* If one of the two operands is an interesting constant, load a
9994 constant with the above and mask it in with a logical operation. */
9996 if (GET_CODE (operands[2]) == CONST_INT)
9999 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10000 operands[3] = constm1_rtx, op = and_optab;
10001 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10002 operands[3] = const0_rtx, op = ior_optab;
10004 return 0; /* FAIL */
10006 else if (GET_CODE (operands[3]) == CONST_INT)
10009 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10010 operands[2] = constm1_rtx, op = and_optab;
10011 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10012 operands[2] = const0_rtx, op = ior_optab;
10014 return 0; /* FAIL */
10017 return 0; /* FAIL */
10019 orig_out = operands[0];
10020 tmp = gen_reg_rtx (mode);
10023 /* Recurse to get the constant loaded. */
10024 if (ix86_expand_int_movcc (operands) == 0)
10025 return 0; /* FAIL */
10027 /* Mask in the interesting variable. */
10028 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10030 if (!rtx_equal_p (out, orig_out))
10031 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10033 return 1; /* DONE */
10037 * For comparison with above,
10047 if (! nonimmediate_operand (operands[2], mode))
10048 operands[2] = force_reg (mode, operands[2]);
10049 if (! nonimmediate_operand (operands[3], mode))
10050 operands[3] = force_reg (mode, operands[3]);
10052 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10054 rtx tmp = gen_reg_rtx (mode);
10055 emit_move_insn (tmp, operands[3]);
10058 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10060 rtx tmp = gen_reg_rtx (mode);
10061 emit_move_insn (tmp, operands[2]);
10065 if (! register_operand (operands[2], VOIDmode)
10067 || ! register_operand (operands[3], VOIDmode)))
10068 operands[2] = force_reg (mode, operands[2]);
10071 && ! register_operand (operands[3], VOIDmode))
10072 operands[3] = force_reg (mode, operands[3]);
10074 emit_insn (compare_seq);
10075 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10076 gen_rtx_IF_THEN_ELSE (mode,
10077 compare_op, operands[2],
10080 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10081 gen_rtx_IF_THEN_ELSE (mode,
10083 copy_rtx (operands[3]),
10084 copy_rtx (operands[0]))));
10086 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10087 gen_rtx_IF_THEN_ELSE (mode,
10089 copy_rtx (operands[2]),
10090 copy_rtx (operands[0]))));
10092 return 1; /* DONE */
10096 ix86_expand_fp_movcc (rtx operands[])
10098 enum rtx_code code;
10100 rtx compare_op, second_test, bypass_test;
10102 /* For SF/DFmode conditional moves based on comparisons
10103 in same mode, we may want to use SSE min/max instructions. */
10104 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10105 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10106 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10107 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10108 && (!TARGET_IEEE_FP
10109 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10110 /* We may be called from the post-reload splitter. */
10111 && (!REG_P (operands[0])
10112 || SSE_REG_P (operands[0])
10113 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10115 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10116 code = GET_CODE (operands[1]);
10118 /* See if we have (cross) match between comparison operands and
10119 conditional move operands. */
10120 if (rtx_equal_p (operands[2], op1))
10125 code = reverse_condition_maybe_unordered (code);
10127 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10129 /* Check for min operation. */
10130 if (code == LT || code == UNLE)
10138 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10139 if (memory_operand (op0, VOIDmode))
10140 op0 = force_reg (GET_MODE (operands[0]), op0);
10141 if (GET_MODE (operands[0]) == SFmode)
10142 emit_insn (gen_minsf3 (operands[0], op0, op1));
10144 emit_insn (gen_mindf3 (operands[0], op0, op1));
10147 /* Check for max operation. */
10148 if (code == GT || code == UNGE)
10156 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10157 if (memory_operand (op0, VOIDmode))
10158 op0 = force_reg (GET_MODE (operands[0]), op0);
10159 if (GET_MODE (operands[0]) == SFmode)
10160 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10162 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10166 /* Manage condition to be sse_comparison_operator. In case we are
10167 in non-ieee mode, try to canonicalize the destination operand
10168 to be first in the comparison - this helps reload to avoid extra
10170 if (!sse_comparison_operator (operands[1], VOIDmode)
10171 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10173 rtx tmp = ix86_compare_op0;
10174 ix86_compare_op0 = ix86_compare_op1;
10175 ix86_compare_op1 = tmp;
10176 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10177 VOIDmode, ix86_compare_op0,
10180 /* Similarly try to manage result to be first operand of conditional
10181 move. We also don't support the NE comparison on SSE, so try to
10183 if ((rtx_equal_p (operands[0], operands[3])
10184 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10185 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10187 rtx tmp = operands[2];
10188 operands[2] = operands[3];
10190 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10191 (GET_CODE (operands[1])),
10192 VOIDmode, ix86_compare_op0,
10195 if (GET_MODE (operands[0]) == SFmode)
10196 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10197 operands[2], operands[3],
10198 ix86_compare_op0, ix86_compare_op1));
10200 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10201 operands[2], operands[3],
10202 ix86_compare_op0, ix86_compare_op1));
10206 /* The floating point conditional move instructions don't directly
10207 support conditions resulting from a signed integer comparison. */
10209 code = GET_CODE (operands[1]);
10210 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10212 /* The floating point conditional move instructions don't directly
10213 support signed integer comparisons. */
10215 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10217 if (second_test != NULL || bypass_test != NULL)
10219 tmp = gen_reg_rtx (QImode);
10220 ix86_expand_setcc (code, tmp);
10222 ix86_compare_op0 = tmp;
10223 ix86_compare_op1 = const0_rtx;
10224 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10226 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10228 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10229 emit_move_insn (tmp, operands[3]);
10232 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10234 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10235 emit_move_insn (tmp, operands[2]);
10239 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10240 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10245 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10246 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10251 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10252 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10260 /* Expand conditional increment or decrement using adb/sbb instructions.
10261 The default case using setcc followed by the conditional move can be
10262 done by generic code. */
10264 ix86_expand_int_addcc (rtx operands[])
10266 enum rtx_code code = GET_CODE (operands[1]);
10268 rtx val = const0_rtx;
10269 bool fpcmp = false;
10270 enum machine_mode mode = GET_MODE (operands[0]);
10272 if (operands[3] != const1_rtx
10273 && operands[3] != constm1_rtx)
10275 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10276 ix86_compare_op1, &compare_op))
10278 code = GET_CODE (compare_op);
10280 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10281 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10284 code = ix86_fp_compare_code_to_integer (code);
10291 PUT_CODE (compare_op,
10292 reverse_condition_maybe_unordered
10293 (GET_CODE (compare_op)));
10295 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10297 PUT_MODE (compare_op, mode);
10299 /* Construct either adc or sbb insn. */
10300 if ((code == LTU) == (operands[3] == constm1_rtx))
10302 switch (GET_MODE (operands[0]))
10305 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10308 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10311 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10314 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10322 switch (GET_MODE (operands[0]))
10325 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10328 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10331 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10334 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10340 return 1; /* DONE */
10344 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10345 works for floating pointer parameters and nonoffsetable memories.
10346 For pushes, it returns just stack offsets; the values will be saved
10347 in the right order. Maximally three parts are generated. */
10350 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10355 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10357 size = (GET_MODE_SIZE (mode) + 4) / 8;
10359 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10361 if (size < 2 || size > 3)
10364 /* Optimize constant pool reference to immediates. This is used by fp
10365 moves, that force all constants to memory to allow combining. */
10366 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10368 rtx tmp = maybe_get_pool_constant (operand);
10373 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10375 /* The only non-offsetable memories we handle are pushes. */
10376 if (! push_operand (operand, VOIDmode))
10379 operand = copy_rtx (operand);
10380 PUT_MODE (operand, Pmode);
10381 parts[0] = parts[1] = parts[2] = operand;
10383 else if (!TARGET_64BIT)
10385 if (mode == DImode)
10386 split_di (&operand, 1, &parts[0], &parts[1]);
10389 if (REG_P (operand))
10391 if (!reload_completed)
10393 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10394 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10396 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10398 else if (offsettable_memref_p (operand))
10400 operand = adjust_address (operand, SImode, 0);
10401 parts[0] = operand;
10402 parts[1] = adjust_address (operand, SImode, 4);
10404 parts[2] = adjust_address (operand, SImode, 8);
10406 else if (GET_CODE (operand) == CONST_DOUBLE)
10411 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10416 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10417 parts[2] = gen_int_mode (l[2], SImode);
10420 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10425 parts[1] = gen_int_mode (l[1], SImode);
10426 parts[0] = gen_int_mode (l[0], SImode);
10434 if (mode == TImode)
10435 split_ti (&operand, 1, &parts[0], &parts[1]);
10436 if (mode == XFmode || mode == TFmode)
10438 if (REG_P (operand))
10440 if (!reload_completed)
10442 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10443 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10445 else if (offsettable_memref_p (operand))
10447 operand = adjust_address (operand, DImode, 0);
10448 parts[0] = operand;
10449 parts[1] = adjust_address (operand, SImode, 8);
10451 else if (GET_CODE (operand) == CONST_DOUBLE)
10456 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10457 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10458 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10459 if (HOST_BITS_PER_WIDE_INT >= 64)
10462 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10463 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10466 parts[0] = immed_double_const (l[0], l[1], DImode);
10467 parts[1] = gen_int_mode (l[2], SImode);
10477 /* Emit insns to perform a move or push of DI, DF, and XF values.
10478 Return false when normal moves are needed; true when all required
10479 insns have been emitted. Operands 2-4 contain the input values
10480 int the correct order; operands 5-7 contain the output values. */
10483 ix86_split_long_move (rtx operands[])
10488 int collisions = 0;
10489 enum machine_mode mode = GET_MODE (operands[0]);
10491 /* The DFmode expanders may ask us to move double.
10492 For 64bit target this is single move. By hiding the fact
10493 here we simplify i386.md splitters. */
10494 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10496 /* Optimize constant pool reference to immediates. This is used by
10497 fp moves, that force all constants to memory to allow combining. */
10499 if (GET_CODE (operands[1]) == MEM
10500 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10501 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10502 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10503 if (push_operand (operands[0], VOIDmode))
10505 operands[0] = copy_rtx (operands[0]);
10506 PUT_MODE (operands[0], Pmode);
10509 operands[0] = gen_lowpart (DImode, operands[0]);
10510 operands[1] = gen_lowpart (DImode, operands[1]);
10511 emit_move_insn (operands[0], operands[1]);
10515 /* The only non-offsettable memory we handle is push. */
10516 if (push_operand (operands[0], VOIDmode))
10518 else if (GET_CODE (operands[0]) == MEM
10519 && ! offsettable_memref_p (operands[0]))
10522 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10523 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10525 /* When emitting push, take care for source operands on the stack. */
10526 if (push && GET_CODE (operands[1]) == MEM
10527 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10530 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10531 XEXP (part[1][2], 0));
10532 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10533 XEXP (part[1][1], 0));
10536 /* We need to do copy in the right order in case an address register
10537 of the source overlaps the destination. */
10538 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10540 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10542 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10545 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10548 /* Collision in the middle part can be handled by reordering. */
10549 if (collisions == 1 && nparts == 3
10550 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10553 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10554 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10557 /* If there are more collisions, we can't handle it by reordering.
10558 Do an lea to the last part and use only one colliding move. */
10559 else if (collisions > 1)
10565 base = part[0][nparts - 1];
10567 /* Handle the case when the last part isn't valid for lea.
10568 Happens in 64-bit mode storing the 12-byte XFmode. */
10569 if (GET_MODE (base) != Pmode)
10570 base = gen_rtx_REG (Pmode, REGNO (base));
10572 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10573 part[1][0] = replace_equiv_address (part[1][0], base);
10574 part[1][1] = replace_equiv_address (part[1][1],
10575 plus_constant (base, UNITS_PER_WORD));
10577 part[1][2] = replace_equiv_address (part[1][2],
10578 plus_constant (base, 8));
10588 /* We use only first 12 bytes of TFmode value, but for pushing we
10589 are required to adjust stack as if we were pushing real 16byte
10591 if (mode == TFmode && !TARGET_64BIT)
10592 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10594 emit_move_insn (part[0][2], part[1][2]);
10599 /* In 64bit mode we don't have 32bit push available. In case this is
10600 register, it is OK - we will just use larger counterpart. We also
10601 retype memory - these comes from attempt to avoid REX prefix on
10602 moving of second half of TFmode value. */
10603 if (GET_MODE (part[1][1]) == SImode)
10605 if (GET_CODE (part[1][1]) == MEM)
10606 part[1][1] = adjust_address (part[1][1], DImode, 0);
10607 else if (REG_P (part[1][1]))
10608 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10611 if (GET_MODE (part[1][0]) == SImode)
10612 part[1][0] = part[1][1];
10615 emit_move_insn (part[0][1], part[1][1]);
10616 emit_move_insn (part[0][0], part[1][0]);
10620 /* Choose correct order to not overwrite the source before it is copied. */
10621 if ((REG_P (part[0][0])
10622 && REG_P (part[1][1])
10623 && (REGNO (part[0][0]) == REGNO (part[1][1])
10625 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10627 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10631 operands[2] = part[0][2];
10632 operands[3] = part[0][1];
10633 operands[4] = part[0][0];
10634 operands[5] = part[1][2];
10635 operands[6] = part[1][1];
10636 operands[7] = part[1][0];
10640 operands[2] = part[0][1];
10641 operands[3] = part[0][0];
10642 operands[5] = part[1][1];
10643 operands[6] = part[1][0];
10650 operands[2] = part[0][0];
10651 operands[3] = part[0][1];
10652 operands[4] = part[0][2];
10653 operands[5] = part[1][0];
10654 operands[6] = part[1][1];
10655 operands[7] = part[1][2];
10659 operands[2] = part[0][0];
10660 operands[3] = part[0][1];
10661 operands[5] = part[1][0];
10662 operands[6] = part[1][1];
10665 emit_move_insn (operands[2], operands[5]);
10666 emit_move_insn (operands[3], operands[6]);
10668 emit_move_insn (operands[4], operands[7]);
10674 ix86_split_ashldi (rtx *operands, rtx scratch)
10676 rtx low[2], high[2];
10679 if (GET_CODE (operands[2]) == CONST_INT)
10681 split_di (operands, 2, low, high);
10682 count = INTVAL (operands[2]) & 63;
10686 emit_move_insn (high[0], low[1]);
10687 emit_move_insn (low[0], const0_rtx);
10690 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10694 if (!rtx_equal_p (operands[0], operands[1]))
10695 emit_move_insn (operands[0], operands[1]);
10696 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10697 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10702 if (!rtx_equal_p (operands[0], operands[1]))
10703 emit_move_insn (operands[0], operands[1]);
10705 split_di (operands, 1, low, high);
10707 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10708 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10710 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10712 if (! no_new_pseudos)
10713 scratch = force_reg (SImode, const0_rtx);
10715 emit_move_insn (scratch, const0_rtx);
10717 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10721 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10726 ix86_split_ashrdi (rtx *operands, rtx scratch)
10728 rtx low[2], high[2];
10731 if (GET_CODE (operands[2]) == CONST_INT)
10733 split_di (operands, 2, low, high);
10734 count = INTVAL (operands[2]) & 63;
10738 emit_move_insn (low[0], high[1]);
10740 if (! reload_completed)
10741 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10744 emit_move_insn (high[0], low[0]);
10745 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10749 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10753 if (!rtx_equal_p (operands[0], operands[1]))
10754 emit_move_insn (operands[0], operands[1]);
10755 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10756 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10761 if (!rtx_equal_p (operands[0], operands[1]))
10762 emit_move_insn (operands[0], operands[1]);
10764 split_di (operands, 1, low, high);
10766 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10767 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10769 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10771 if (! no_new_pseudos)
10772 scratch = gen_reg_rtx (SImode);
10773 emit_move_insn (scratch, high[0]);
10774 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10775 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10779 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10784 ix86_split_lshrdi (rtx *operands, rtx scratch)
10786 rtx low[2], high[2];
10789 if (GET_CODE (operands[2]) == CONST_INT)
10791 split_di (operands, 2, low, high);
10792 count = INTVAL (operands[2]) & 63;
10796 emit_move_insn (low[0], high[1]);
10797 emit_move_insn (high[0], const0_rtx);
10800 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10804 if (!rtx_equal_p (operands[0], operands[1]))
10805 emit_move_insn (operands[0], operands[1]);
10806 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10807 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10812 if (!rtx_equal_p (operands[0], operands[1]))
10813 emit_move_insn (operands[0], operands[1]);
10815 split_di (operands, 1, low, high);
10817 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10818 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10820 /* Heh. By reversing the arguments, we can reuse this pattern. */
10821 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10823 if (! no_new_pseudos)
10824 scratch = force_reg (SImode, const0_rtx);
10826 emit_move_insn (scratch, const0_rtx);
10828 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10832 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10836 /* Helper function for the string operations below. Dest VARIABLE whether
10837 it is aligned to VALUE bytes. If true, jump to the label. */
10839 ix86_expand_aligntest (rtx variable, int value)
10841 rtx label = gen_label_rtx ();
10842 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10843 if (GET_MODE (variable) == DImode)
10844 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10846 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10847 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10852 /* Adjust COUNTER by the VALUE. */
10854 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10856 if (GET_MODE (countreg) == DImode)
10857 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10859 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10862 /* Zero extend possibly SImode EXP to Pmode register. */
10864 ix86_zero_extend_to_Pmode (rtx exp)
10867 if (GET_MODE (exp) == VOIDmode)
10868 return force_reg (Pmode, exp);
10869 if (GET_MODE (exp) == Pmode)
10870 return copy_to_mode_reg (Pmode, exp);
10871 r = gen_reg_rtx (Pmode);
10872 emit_insn (gen_zero_extendsidi2 (r, exp));
10876 /* Expand string move (memcpy) operation. Use i386 string operations when
10877 profitable. expand_clrstr contains similar code. */
10879 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10881 rtx srcreg, destreg, countreg;
10882 enum machine_mode counter_mode;
10883 HOST_WIDE_INT align = 0;
10884 unsigned HOST_WIDE_INT count = 0;
10887 if (GET_CODE (align_exp) == CONST_INT)
10888 align = INTVAL (align_exp);
10890 /* Can't use any of this if the user has appropriated esi or edi. */
10891 if (global_regs[4] || global_regs[5])
10894 /* This simple hack avoids all inlining code and simplifies code below. */
10895 if (!TARGET_ALIGN_STRINGOPS)
10898 if (GET_CODE (count_exp) == CONST_INT)
10900 count = INTVAL (count_exp);
10901 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10905 /* Figure out proper mode for counter. For 32bits it is always SImode,
10906 for 64bits use SImode when possible, otherwise DImode.
10907 Set count to number of bytes copied when known at compile time. */
10908 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10909 || x86_64_zero_extended_value (count_exp))
10910 counter_mode = SImode;
10912 counter_mode = DImode;
10916 if (counter_mode != SImode && counter_mode != DImode)
10919 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10920 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10922 emit_insn (gen_cld ());
10924 /* When optimizing for size emit simple rep ; movsb instruction for
10925 counts not divisible by 4. */
10927 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10929 countreg = ix86_zero_extend_to_Pmode (count_exp);
10931 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10932 destreg, srcreg, countreg));
10934 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10935 destreg, srcreg, countreg));
10938 /* For constant aligned (or small unaligned) copies use rep movsl
10939 followed by code copying the rest. For PentiumPro ensure 8 byte
10940 alignment to allow rep movsl acceleration. */
10942 else if (count != 0
10944 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10945 || optimize_size || count < (unsigned int) 64))
10947 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10948 if (count & ~(size - 1))
10950 countreg = copy_to_mode_reg (counter_mode,
10951 GEN_INT ((count >> (size == 4 ? 2 : 3))
10952 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10953 countreg = ix86_zero_extend_to_Pmode (countreg);
10957 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10958 destreg, srcreg, countreg));
10960 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10961 destreg, srcreg, countreg));
10964 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10965 destreg, srcreg, countreg));
10967 if (size == 8 && (count & 0x04))
10968 emit_insn (gen_strmovsi (destreg, srcreg));
10970 emit_insn (gen_strmovhi (destreg, srcreg));
10972 emit_insn (gen_strmovqi (destreg, srcreg));
10974 /* The generic code based on the glibc implementation:
10975 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10976 allowing accelerated copying there)
10977 - copy the data using rep movsl
10978 - copy the rest. */
10983 int desired_alignment = (TARGET_PENTIUMPRO
10984 && (count == 0 || count >= (unsigned int) 260)
10985 ? 8 : UNITS_PER_WORD);
10987 /* In case we don't know anything about the alignment, default to
10988 library version, since it is usually equally fast and result in
10991 Also emit call when we know that the count is large and call overhead
10992 will not be important. */
10993 if (!TARGET_INLINE_ALL_STRINGOPS
10994 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11000 if (TARGET_SINGLE_STRINGOP)
11001 emit_insn (gen_cld ());
11003 countreg2 = gen_reg_rtx (Pmode);
11004 countreg = copy_to_mode_reg (counter_mode, count_exp);
11006 /* We don't use loops to align destination and to copy parts smaller
11007 than 4 bytes, because gcc is able to optimize such code better (in
11008 the case the destination or the count really is aligned, gcc is often
11009 able to predict the branches) and also it is friendlier to the
11010 hardware branch prediction.
11012 Using loops is beneficial for generic case, because we can
11013 handle small counts using the loops. Many CPUs (such as Athlon)
11014 have large REP prefix setup costs.
11016 This is quite costly. Maybe we can revisit this decision later or
11017 add some customizability to this code. */
11019 if (count == 0 && align < desired_alignment)
11021 label = gen_label_rtx ();
11022 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11023 LEU, 0, counter_mode, 1, label);
11027 rtx label = ix86_expand_aligntest (destreg, 1);
11028 emit_insn (gen_strmovqi (destreg, srcreg));
11029 ix86_adjust_counter (countreg, 1);
11030 emit_label (label);
11031 LABEL_NUSES (label) = 1;
11035 rtx label = ix86_expand_aligntest (destreg, 2);
11036 emit_insn (gen_strmovhi (destreg, srcreg));
11037 ix86_adjust_counter (countreg, 2);
11038 emit_label (label);
11039 LABEL_NUSES (label) = 1;
11041 if (align <= 4 && desired_alignment > 4)
11043 rtx label = ix86_expand_aligntest (destreg, 4);
11044 emit_insn (gen_strmovsi (destreg, srcreg));
11045 ix86_adjust_counter (countreg, 4);
11046 emit_label (label);
11047 LABEL_NUSES (label) = 1;
11050 if (label && desired_alignment > 4 && !TARGET_64BIT)
11052 emit_label (label);
11053 LABEL_NUSES (label) = 1;
11056 if (!TARGET_SINGLE_STRINGOP)
11057 emit_insn (gen_cld ());
11060 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11062 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11063 destreg, srcreg, countreg2));
11067 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11068 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11069 destreg, srcreg, countreg2));
11074 emit_label (label);
11075 LABEL_NUSES (label) = 1;
11077 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11078 emit_insn (gen_strmovsi (destreg, srcreg));
11079 if ((align <= 4 || count == 0) && TARGET_64BIT)
11081 rtx label = ix86_expand_aligntest (countreg, 4);
11082 emit_insn (gen_strmovsi (destreg, srcreg));
11083 emit_label (label);
11084 LABEL_NUSES (label) = 1;
11086 if (align > 2 && count != 0 && (count & 2))
11087 emit_insn (gen_strmovhi (destreg, srcreg));
11088 if (align <= 2 || count == 0)
11090 rtx label = ix86_expand_aligntest (countreg, 2);
11091 emit_insn (gen_strmovhi (destreg, srcreg));
11092 emit_label (label);
11093 LABEL_NUSES (label) = 1;
11095 if (align > 1 && count != 0 && (count & 1))
11096 emit_insn (gen_strmovqi (destreg, srcreg));
11097 if (align <= 1 || count == 0)
11099 rtx label = ix86_expand_aligntest (countreg, 1);
11100 emit_insn (gen_strmovqi (destreg, srcreg));
11101 emit_label (label);
11102 LABEL_NUSES (label) = 1;
11106 insns = get_insns ();
11109 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11114 /* Expand string clear operation (bzero). Use i386 string operations when
11115 profitable. expand_movstr contains similar code. */
11117 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11119 rtx destreg, zeroreg, countreg;
11120 enum machine_mode counter_mode;
11121 HOST_WIDE_INT align = 0;
11122 unsigned HOST_WIDE_INT count = 0;
11124 if (GET_CODE (align_exp) == CONST_INT)
11125 align = INTVAL (align_exp);
11127 /* Can't use any of this if the user has appropriated esi. */
11128 if (global_regs[4])
11131 /* This simple hack avoids all inlining code and simplifies code below. */
11132 if (!TARGET_ALIGN_STRINGOPS)
11135 if (GET_CODE (count_exp) == CONST_INT)
11137 count = INTVAL (count_exp);
11138 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11141 /* Figure out proper mode for counter. For 32bits it is always SImode,
11142 for 64bits use SImode when possible, otherwise DImode.
11143 Set count to number of bytes copied when known at compile time. */
11144 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11145 || x86_64_zero_extended_value (count_exp))
11146 counter_mode = SImode;
11148 counter_mode = DImode;
11150 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11152 emit_insn (gen_cld ());
11154 /* When optimizing for size emit simple rep ; movsb instruction for
11155 counts not divisible by 4. */
11157 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11159 countreg = ix86_zero_extend_to_Pmode (count_exp);
11160 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11162 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11163 destreg, countreg));
11165 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11166 destreg, countreg));
11168 else if (count != 0
11170 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11171 || optimize_size || count < (unsigned int) 64))
11173 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11174 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11175 if (count & ~(size - 1))
11177 countreg = copy_to_mode_reg (counter_mode,
11178 GEN_INT ((count >> (size == 4 ? 2 : 3))
11179 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11180 countreg = ix86_zero_extend_to_Pmode (countreg);
11184 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11185 destreg, countreg));
11187 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11188 destreg, countreg));
11191 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11192 destreg, countreg));
11194 if (size == 8 && (count & 0x04))
11195 emit_insn (gen_strsetsi (destreg,
11196 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11198 emit_insn (gen_strsethi (destreg,
11199 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11201 emit_insn (gen_strsetqi (destreg,
11202 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11208 /* Compute desired alignment of the string operation. */
11209 int desired_alignment = (TARGET_PENTIUMPRO
11210 && (count == 0 || count >= (unsigned int) 260)
11211 ? 8 : UNITS_PER_WORD);
11213 /* In case we don't know anything about the alignment, default to
11214 library version, since it is usually equally fast and result in
11217 Also emit call when we know that the count is large and call overhead
11218 will not be important. */
11219 if (!TARGET_INLINE_ALL_STRINGOPS
11220 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11223 if (TARGET_SINGLE_STRINGOP)
11224 emit_insn (gen_cld ());
11226 countreg2 = gen_reg_rtx (Pmode);
11227 countreg = copy_to_mode_reg (counter_mode, count_exp);
11228 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11230 if (count == 0 && align < desired_alignment)
11232 label = gen_label_rtx ();
11233 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11234 LEU, 0, counter_mode, 1, label);
11238 rtx label = ix86_expand_aligntest (destreg, 1);
11239 emit_insn (gen_strsetqi (destreg,
11240 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11241 ix86_adjust_counter (countreg, 1);
11242 emit_label (label);
11243 LABEL_NUSES (label) = 1;
11247 rtx label = ix86_expand_aligntest (destreg, 2);
11248 emit_insn (gen_strsethi (destreg,
11249 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11250 ix86_adjust_counter (countreg, 2);
11251 emit_label (label);
11252 LABEL_NUSES (label) = 1;
11254 if (align <= 4 && desired_alignment > 4)
11256 rtx label = ix86_expand_aligntest (destreg, 4);
11257 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11258 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11260 ix86_adjust_counter (countreg, 4);
11261 emit_label (label);
11262 LABEL_NUSES (label) = 1;
11265 if (label && desired_alignment > 4 && !TARGET_64BIT)
11267 emit_label (label);
11268 LABEL_NUSES (label) = 1;
11272 if (!TARGET_SINGLE_STRINGOP)
11273 emit_insn (gen_cld ());
11276 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11278 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11279 destreg, countreg2));
11283 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11284 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11285 destreg, countreg2));
11289 emit_label (label);
11290 LABEL_NUSES (label) = 1;
11293 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11294 emit_insn (gen_strsetsi (destreg,
11295 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11296 if (TARGET_64BIT && (align <= 4 || count == 0))
11298 rtx label = ix86_expand_aligntest (countreg, 4);
11299 emit_insn (gen_strsetsi (destreg,
11300 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11301 emit_label (label);
11302 LABEL_NUSES (label) = 1;
11304 if (align > 2 && count != 0 && (count & 2))
11305 emit_insn (gen_strsethi (destreg,
11306 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11307 if (align <= 2 || count == 0)
11309 rtx label = ix86_expand_aligntest (countreg, 2);
11310 emit_insn (gen_strsethi (destreg,
11311 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11312 emit_label (label);
11313 LABEL_NUSES (label) = 1;
11315 if (align > 1 && count != 0 && (count & 1))
11316 emit_insn (gen_strsetqi (destreg,
11317 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11318 if (align <= 1 || count == 0)
11320 rtx label = ix86_expand_aligntest (countreg, 1);
11321 emit_insn (gen_strsetqi (destreg,
11322 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11323 emit_label (label);
11324 LABEL_NUSES (label) = 1;
11329 /* Expand strlen. */
11331 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11333 rtx addr, scratch1, scratch2, scratch3, scratch4;
11335 /* The generic case of strlen expander is long. Avoid it's
11336 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11338 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11339 && !TARGET_INLINE_ALL_STRINGOPS
11341 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11344 addr = force_reg (Pmode, XEXP (src, 0));
11345 scratch1 = gen_reg_rtx (Pmode);
11347 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11350 /* Well it seems that some optimizer does not combine a call like
11351 foo(strlen(bar), strlen(bar));
11352 when the move and the subtraction is done here. It does calculate
11353 the length just once when these instructions are done inside of
11354 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11355 often used and I use one fewer register for the lifetime of
11356 output_strlen_unroll() this is better. */
11358 emit_move_insn (out, addr);
11360 ix86_expand_strlensi_unroll_1 (out, align);
11362 /* strlensi_unroll_1 returns the address of the zero at the end of
11363 the string, like memchr(), so compute the length by subtracting
11364 the start address. */
11366 emit_insn (gen_subdi3 (out, out, addr));
11368 emit_insn (gen_subsi3 (out, out, addr));
11372 scratch2 = gen_reg_rtx (Pmode);
11373 scratch3 = gen_reg_rtx (Pmode);
11374 scratch4 = force_reg (Pmode, constm1_rtx);
11376 emit_move_insn (scratch3, addr);
11377 eoschar = force_reg (QImode, eoschar);
11379 emit_insn (gen_cld ());
11382 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11383 align, scratch4, scratch3));
11384 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11385 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11389 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11390 align, scratch4, scratch3));
11391 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11392 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11398 /* Expand the appropriate insns for doing strlen if not just doing
11401 out = result, initialized with the start address
11402 align_rtx = alignment of the address.
11403 scratch = scratch register, initialized with the startaddress when
11404 not aligned, otherwise undefined
11406 This is just the body. It needs the initializations mentioned above and
11407 some address computing at the end. These things are done in i386.md. */
11410 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11414 rtx align_2_label = NULL_RTX;
11415 rtx align_3_label = NULL_RTX;
11416 rtx align_4_label = gen_label_rtx ();
11417 rtx end_0_label = gen_label_rtx ();
11419 rtx tmpreg = gen_reg_rtx (SImode);
11420 rtx scratch = gen_reg_rtx (SImode);
11424 if (GET_CODE (align_rtx) == CONST_INT)
11425 align = INTVAL (align_rtx);
11427 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11429 /* Is there a known alignment and is it less than 4? */
11432 rtx scratch1 = gen_reg_rtx (Pmode);
11433 emit_move_insn (scratch1, out);
11434 /* Is there a known alignment and is it not 2? */
11437 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11438 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11440 /* Leave just the 3 lower bits. */
11441 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11442 NULL_RTX, 0, OPTAB_WIDEN);
11444 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11445 Pmode, 1, align_4_label);
11446 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11447 Pmode, 1, align_2_label);
11448 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11449 Pmode, 1, align_3_label);
11453 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11454 check if is aligned to 4 - byte. */
11456 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11457 NULL_RTX, 0, OPTAB_WIDEN);
11459 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11460 Pmode, 1, align_4_label);
11463 mem = gen_rtx_MEM (QImode, out);
11465 /* Now compare the bytes. */
11467 /* Compare the first n unaligned byte on a byte per byte basis. */
11468 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11469 QImode, 1, end_0_label);
11471 /* Increment the address. */
11473 emit_insn (gen_adddi3 (out, out, const1_rtx));
11475 emit_insn (gen_addsi3 (out, out, const1_rtx));
11477 /* Not needed with an alignment of 2 */
11480 emit_label (align_2_label);
11482 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11486 emit_insn (gen_adddi3 (out, out, const1_rtx));
11488 emit_insn (gen_addsi3 (out, out, const1_rtx));
11490 emit_label (align_3_label);
11493 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11497 emit_insn (gen_adddi3 (out, out, const1_rtx));
11499 emit_insn (gen_addsi3 (out, out, const1_rtx));
11502 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11503 align this loop. It gives only huge programs, but does not help to
11505 emit_label (align_4_label);
11507 mem = gen_rtx_MEM (SImode, out);
11508 emit_move_insn (scratch, mem);
11510 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11512 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11514 /* This formula yields a nonzero result iff one of the bytes is zero.
11515 This saves three branches inside loop and many cycles. */
11517 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11518 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11519 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11520 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11521 gen_int_mode (0x80808080, SImode)));
11522 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11527 rtx reg = gen_reg_rtx (SImode);
11528 rtx reg2 = gen_reg_rtx (Pmode);
11529 emit_move_insn (reg, tmpreg);
11530 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11532 /* If zero is not in the first two bytes, move two bytes forward. */
11533 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11534 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11535 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11536 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11537 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11540 /* Emit lea manually to avoid clobbering of flags. */
11541 emit_insn (gen_rtx_SET (SImode, reg2,
11542 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11544 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11545 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11546 emit_insn (gen_rtx_SET (VOIDmode, out,
11547 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11554 rtx end_2_label = gen_label_rtx ();
11555 /* Is zero in the first two bytes? */
11557 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11558 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11559 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11560 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11561 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11563 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11564 JUMP_LABEL (tmp) = end_2_label;
11566 /* Not in the first two. Move two bytes forward. */
11567 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11569 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11571 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11573 emit_label (end_2_label);
11577 /* Avoid branch in fixing the byte. */
11578 tmpreg = gen_lowpart (QImode, tmpreg);
11579 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11580 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11582 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11584 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11586 emit_label (end_0_label);
11590 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11591 rtx pop, int sibcall)
11593 rtx use = NULL, call;
11595 if (pop == const0_rtx)
11597 if (TARGET_64BIT && pop)
11601 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11602 fnaddr = machopic_indirect_call_target (fnaddr);
11604 /* Static functions and indirect calls don't need the pic register. */
11605 if (! TARGET_64BIT && flag_pic
11606 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11607 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11608 use_reg (&use, pic_offset_table_rtx);
11610 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11612 rtx al = gen_rtx_REG (QImode, 0);
11613 emit_move_insn (al, callarg2);
11614 use_reg (&use, al);
11616 #endif /* TARGET_MACHO */
11618 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11620 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11621 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11623 if (sibcall && TARGET_64BIT
11624 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11627 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11628 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11629 emit_move_insn (fnaddr, addr);
11630 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11633 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11635 call = gen_rtx_SET (VOIDmode, retval, call);
11638 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11639 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11640 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11643 call = emit_call_insn (call);
11645 CALL_INSN_FUNCTION_USAGE (call) = use;
11649 /* Clear stack slot assignments remembered from previous functions.
11650 This is called from INIT_EXPANDERS once before RTL is emitted for each
11653 static struct machine_function *
11654 ix86_init_machine_status (void)
11656 struct machine_function *f;
11658 f = ggc_alloc_cleared (sizeof (struct machine_function));
11659 f->use_fast_prologue_epilogue_nregs = -1;
11664 /* Return a MEM corresponding to a stack slot with mode MODE.
11665 Allocate a new slot if necessary.
11667 The RTL for a function can have several slots available: N is
11668 which slot to use. */
11671 assign_386_stack_local (enum machine_mode mode, int n)
11673 struct stack_local_entry *s;
11675 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11678 for (s = ix86_stack_locals; s; s = s->next)
11679 if (s->mode == mode && s->n == n)
11682 s = (struct stack_local_entry *)
11683 ggc_alloc (sizeof (struct stack_local_entry));
11686 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11688 s->next = ix86_stack_locals;
11689 ix86_stack_locals = s;
11693 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11695 static GTY(()) rtx ix86_tls_symbol;
11697 ix86_tls_get_addr (void)
11700 if (!ix86_tls_symbol)
11702 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11703 (TARGET_GNU_TLS && !TARGET_64BIT)
11704 ? "___tls_get_addr"
11705 : "__tls_get_addr");
11708 return ix86_tls_symbol;
11711 /* Calculate the length of the memory address in the instruction
11712 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11715 memory_address_length (rtx addr)
11717 struct ix86_address parts;
11718 rtx base, index, disp;
11721 if (GET_CODE (addr) == PRE_DEC
11722 || GET_CODE (addr) == POST_INC
11723 || GET_CODE (addr) == PRE_MODIFY
11724 || GET_CODE (addr) == POST_MODIFY)
11727 if (! ix86_decompose_address (addr, &parts))
11731 index = parts.index;
11736 - esp as the base always wants an index,
11737 - ebp as the base always wants a displacement. */
11739 /* Register Indirect. */
11740 if (base && !index && !disp)
11742 /* esp (for its index) and ebp (for its displacement) need
11743 the two-byte modrm form. */
11744 if (addr == stack_pointer_rtx
11745 || addr == arg_pointer_rtx
11746 || addr == frame_pointer_rtx
11747 || addr == hard_frame_pointer_rtx)
11751 /* Direct Addressing. */
11752 else if (disp && !base && !index)
11757 /* Find the length of the displacement constant. */
11760 if (GET_CODE (disp) == CONST_INT
11761 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11767 /* ebp always wants a displacement. */
11768 else if (base == hard_frame_pointer_rtx)
11771 /* An index requires the two-byte modrm form... */
11773 /* ...like esp, which always wants an index. */
11774 || base == stack_pointer_rtx
11775 || base == arg_pointer_rtx
11776 || base == frame_pointer_rtx)
11783 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11784 is set, expect that insn have 8bit immediate alternative. */
11786 ix86_attr_length_immediate_default (rtx insn, int shortform)
11790 extract_insn_cached (insn);
11791 for (i = recog_data.n_operands - 1; i >= 0; --i)
11792 if (CONSTANT_P (recog_data.operand[i]))
11797 && GET_CODE (recog_data.operand[i]) == CONST_INT
11798 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11802 switch (get_attr_mode (insn))
11813 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11818 fatal_insn ("unknown insn mode", insn);
11824 /* Compute default value for "length_address" attribute. */
11826 ix86_attr_length_address_default (rtx insn)
11830 if (get_attr_type (insn) == TYPE_LEA)
11832 rtx set = PATTERN (insn);
11833 if (GET_CODE (set) == SET)
11835 else if (GET_CODE (set) == PARALLEL
11836 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11837 set = XVECEXP (set, 0, 0);
11840 #ifdef ENABLE_CHECKING
11846 return memory_address_length (SET_SRC (set));
11849 extract_insn_cached (insn);
11850 for (i = recog_data.n_operands - 1; i >= 0; --i)
11851 if (GET_CODE (recog_data.operand[i]) == MEM)
11853 return memory_address_length (XEXP (recog_data.operand[i], 0));
11859 /* Return the maximum number of instructions a cpu can issue. */
11862 ix86_issue_rate (void)
11866 case PROCESSOR_PENTIUM:
11870 case PROCESSOR_PENTIUMPRO:
11871 case PROCESSOR_PENTIUM4:
11872 case PROCESSOR_ATHLON:
11881 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11882 by DEP_INSN and nothing set by DEP_INSN. */
11885 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11889 /* Simplify the test for uninteresting insns. */
11890 if (insn_type != TYPE_SETCC
11891 && insn_type != TYPE_ICMOV
11892 && insn_type != TYPE_FCMOV
11893 && insn_type != TYPE_IBR)
11896 if ((set = single_set (dep_insn)) != 0)
11898 set = SET_DEST (set);
11901 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11902 && XVECLEN (PATTERN (dep_insn), 0) == 2
11903 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11904 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11906 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11907 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11912 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11915 /* This test is true if the dependent insn reads the flags but
11916 not any other potentially set register. */
11917 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11920 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11926 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11927 address with operands set by DEP_INSN. */
11930 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11934 if (insn_type == TYPE_LEA
11937 addr = PATTERN (insn);
11938 if (GET_CODE (addr) == SET)
11940 else if (GET_CODE (addr) == PARALLEL
11941 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11942 addr = XVECEXP (addr, 0, 0);
11945 addr = SET_SRC (addr);
11950 extract_insn_cached (insn);
11951 for (i = recog_data.n_operands - 1; i >= 0; --i)
11952 if (GET_CODE (recog_data.operand[i]) == MEM)
11954 addr = XEXP (recog_data.operand[i], 0);
11961 return modified_in_p (addr, dep_insn);
11965 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11967 enum attr_type insn_type, dep_insn_type;
11968 enum attr_memory memory, dep_memory;
11970 int dep_insn_code_number;
11972 /* Anti and output dependencies have zero cost on all CPUs. */
11973 if (REG_NOTE_KIND (link) != 0)
11976 dep_insn_code_number = recog_memoized (dep_insn);
11978 /* If we can't recognize the insns, we can't really do anything. */
11979 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11982 insn_type = get_attr_type (insn);
11983 dep_insn_type = get_attr_type (dep_insn);
11987 case PROCESSOR_PENTIUM:
11988 /* Address Generation Interlock adds a cycle of latency. */
11989 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11992 /* ??? Compares pair with jump/setcc. */
11993 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11996 /* Floating point stores require value to be ready one cycle earlier. */
11997 if (insn_type == TYPE_FMOV
11998 && get_attr_memory (insn) == MEMORY_STORE
11999 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12003 case PROCESSOR_PENTIUMPRO:
12004 memory = get_attr_memory (insn);
12005 dep_memory = get_attr_memory (dep_insn);
12007 /* Since we can't represent delayed latencies of load+operation,
12008 increase the cost here for non-imov insns. */
12009 if (dep_insn_type != TYPE_IMOV
12010 && dep_insn_type != TYPE_FMOV
12011 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12014 /* INT->FP conversion is expensive. */
12015 if (get_attr_fp_int_src (dep_insn))
12018 /* There is one cycle extra latency between an FP op and a store. */
12019 if (insn_type == TYPE_FMOV
12020 && (set = single_set (dep_insn)) != NULL_RTX
12021 && (set2 = single_set (insn)) != NULL_RTX
12022 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12023 && GET_CODE (SET_DEST (set2)) == MEM)
12026 /* Show ability of reorder buffer to hide latency of load by executing
12027 in parallel with previous instruction in case
12028 previous instruction is not needed to compute the address. */
12029 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12030 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12032 /* Claim moves to take one cycle, as core can issue one load
12033 at time and the next load can start cycle later. */
12034 if (dep_insn_type == TYPE_IMOV
12035 || dep_insn_type == TYPE_FMOV)
12043 memory = get_attr_memory (insn);
12044 dep_memory = get_attr_memory (dep_insn);
12045 /* The esp dependency is resolved before the instruction is really
12047 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12048 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12051 /* Since we can't represent delayed latencies of load+operation,
12052 increase the cost here for non-imov insns. */
12053 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12054 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12056 /* INT->FP conversion is expensive. */
12057 if (get_attr_fp_int_src (dep_insn))
12060 /* Show ability of reorder buffer to hide latency of load by executing
12061 in parallel with previous instruction in case
12062 previous instruction is not needed to compute the address. */
12063 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12064 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12066 /* Claim moves to take one cycle, as core can issue one load
12067 at time and the next load can start cycle later. */
12068 if (dep_insn_type == TYPE_IMOV
12069 || dep_insn_type == TYPE_FMOV)
12078 case PROCESSOR_ATHLON:
12080 memory = get_attr_memory (insn);
12081 dep_memory = get_attr_memory (dep_insn);
12083 /* Show ability of reorder buffer to hide latency of load by executing
12084 in parallel with previous instruction in case
12085 previous instruction is not needed to compute the address. */
12086 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12087 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12089 enum attr_unit unit = get_attr_unit (insn);
12092 /* Because of the difference between the length of integer and
12093 floating unit pipeline preparation stages, the memory operands
12094 for floating point are cheaper.
12096 ??? For Athlon it the difference is most probably 2. */
12097 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12100 loadcost = TARGET_ATHLON ? 2 : 0;
12102 if (cost >= loadcost)
12117 struct ppro_sched_data
12120 int issued_this_cycle;
12124 static enum attr_ppro_uops
12125 ix86_safe_ppro_uops (rtx insn)
12127 if (recog_memoized (insn) >= 0)
12128 return get_attr_ppro_uops (insn);
12130 return PPRO_UOPS_MANY;
12134 ix86_dump_ppro_packet (FILE *dump)
12136 if (ix86_sched_data.ppro.decode[0])
12138 fprintf (dump, "PPRO packet: %d",
12139 INSN_UID (ix86_sched_data.ppro.decode[0]));
12140 if (ix86_sched_data.ppro.decode[1])
12141 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12142 if (ix86_sched_data.ppro.decode[2])
12143 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12144 fputc ('\n', dump);
12148 /* We're beginning a new block. Initialize data structures as necessary. */
12151 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12152 int sched_verbose ATTRIBUTE_UNUSED,
12153 int veclen ATTRIBUTE_UNUSED)
12155 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12158 /* Shift INSN to SLOT, and shift everything else down. */
12161 ix86_reorder_insn (rtx *insnp, rtx *slot)
12167 insnp[0] = insnp[1];
12168 while (++insnp != slot);
12174 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12177 enum attr_ppro_uops cur_uops;
12178 int issued_this_cycle;
12182 /* At this point .ppro.decode contains the state of the three
12183 decoders from last "cycle". That is, those insns that were
12184 actually independent. But here we're scheduling for the
12185 decoder, and we may find things that are decodable in the
12188 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12189 issued_this_cycle = 0;
12192 cur_uops = ix86_safe_ppro_uops (*insnp);
12194 /* If the decoders are empty, and we've a complex insn at the
12195 head of the priority queue, let it issue without complaint. */
12196 if (decode[0] == NULL)
12198 if (cur_uops == PPRO_UOPS_MANY)
12200 decode[0] = *insnp;
12204 /* Otherwise, search for a 2-4 uop unsn to issue. */
12205 while (cur_uops != PPRO_UOPS_FEW)
12207 if (insnp == ready)
12209 cur_uops = ix86_safe_ppro_uops (*--insnp);
12212 /* If so, move it to the head of the line. */
12213 if (cur_uops == PPRO_UOPS_FEW)
12214 ix86_reorder_insn (insnp, e_ready);
12216 /* Issue the head of the queue. */
12217 issued_this_cycle = 1;
12218 decode[0] = *e_ready--;
12221 /* Look for simple insns to fill in the other two slots. */
12222 for (i = 1; i < 3; ++i)
12223 if (decode[i] == NULL)
12225 if (ready > e_ready)
12229 cur_uops = ix86_safe_ppro_uops (*insnp);
12230 while (cur_uops != PPRO_UOPS_ONE)
12232 if (insnp == ready)
12234 cur_uops = ix86_safe_ppro_uops (*--insnp);
12237 /* Found one. Move it to the head of the queue and issue it. */
12238 if (cur_uops == PPRO_UOPS_ONE)
12240 ix86_reorder_insn (insnp, e_ready);
12241 decode[i] = *e_ready--;
12242 issued_this_cycle++;
12246 /* ??? Didn't find one. Ideally, here we would do a lazy split
12247 of 2-uop insns, issue one and queue the other. */
12251 if (issued_this_cycle == 0)
12252 issued_this_cycle = 1;
12253 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12256 /* We are about to being issuing insns for this clock cycle.
12257 Override the default sort algorithm to better slot instructions. */
12259 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12260 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12261 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12263 int n_ready = *n_readyp;
12264 rtx *e_ready = ready + n_ready - 1;
12266 /* Make sure to go ahead and initialize key items in
12267 ix86_sched_data if we are not going to bother trying to
12268 reorder the ready queue. */
12271 ix86_sched_data.ppro.issued_this_cycle = 1;
12280 case PROCESSOR_PENTIUMPRO:
12281 ix86_sched_reorder_ppro (ready, e_ready);
12286 return ix86_issue_rate ();
12289 /* We are about to issue INSN. Return the number of insns left on the
12290 ready queue that can be issued this cycle. */
12293 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12294 int can_issue_more)
12300 return can_issue_more - 1;
12302 case PROCESSOR_PENTIUMPRO:
12304 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12306 if (uops == PPRO_UOPS_MANY)
12309 ix86_dump_ppro_packet (dump);
12310 ix86_sched_data.ppro.decode[0] = insn;
12311 ix86_sched_data.ppro.decode[1] = NULL;
12312 ix86_sched_data.ppro.decode[2] = NULL;
12314 ix86_dump_ppro_packet (dump);
12315 ix86_sched_data.ppro.decode[0] = NULL;
12317 else if (uops == PPRO_UOPS_FEW)
12320 ix86_dump_ppro_packet (dump);
12321 ix86_sched_data.ppro.decode[0] = insn;
12322 ix86_sched_data.ppro.decode[1] = NULL;
12323 ix86_sched_data.ppro.decode[2] = NULL;
12327 for (i = 0; i < 3; ++i)
12328 if (ix86_sched_data.ppro.decode[i] == NULL)
12330 ix86_sched_data.ppro.decode[i] = insn;
12338 ix86_dump_ppro_packet (dump);
12339 ix86_sched_data.ppro.decode[0] = NULL;
12340 ix86_sched_data.ppro.decode[1] = NULL;
12341 ix86_sched_data.ppro.decode[2] = NULL;
12345 return --ix86_sched_data.ppro.issued_this_cycle;
12350 ia32_use_dfa_pipeline_interface (void)
12352 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12357 /* How many alternative schedules to try. This should be as wide as the
12358 scheduling freedom in the DFA, but no wider. Making this value too
12359 large results extra work for the scheduler. */
12362 ia32_multipass_dfa_lookahead (void)
12364 if (ix86_tune == PROCESSOR_PENTIUM)
12371 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12372 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12376 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12381 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12383 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12387 /* Subroutine of above to actually do the updating by recursively walking
12391 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12394 enum rtx_code code = GET_CODE (x);
12395 const char *format_ptr = GET_RTX_FORMAT (code);
12398 if (code == MEM && XEXP (x, 0) == dstreg)
12399 MEM_COPY_ATTRIBUTES (x, dstref);
12400 else if (code == MEM && XEXP (x, 0) == srcreg)
12401 MEM_COPY_ATTRIBUTES (x, srcref);
12403 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12405 if (*format_ptr == 'e')
12406 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12408 else if (*format_ptr == 'E')
12409 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12410 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12415 /* Compute the alignment given to a constant that is being placed in memory.
12416 EXP is the constant and ALIGN is the alignment that the object would
12418 The value of this function is used instead of that alignment to align
12422 ix86_constant_alignment (tree exp, int align)
12424 if (TREE_CODE (exp) == REAL_CST)
12426 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12428 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12431 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12438 /* Compute the alignment for a static variable.
12439 TYPE is the data type, and ALIGN is the alignment that
12440 the object would ordinarily have. The value of this function is used
12441 instead of that alignment to align the object. */
12444 ix86_data_alignment (tree type, int align)
12446 if (AGGREGATE_TYPE_P (type)
12447 && TYPE_SIZE (type)
12448 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12449 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12450 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12453 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12454 to 16byte boundary. */
12457 if (AGGREGATE_TYPE_P (type)
12458 && TYPE_SIZE (type)
12459 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12460 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12461 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12465 if (TREE_CODE (type) == ARRAY_TYPE)
12467 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12469 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12472 else if (TREE_CODE (type) == COMPLEX_TYPE)
12475 if (TYPE_MODE (type) == DCmode && align < 64)
12477 if (TYPE_MODE (type) == XCmode && align < 128)
12480 else if ((TREE_CODE (type) == RECORD_TYPE
12481 || TREE_CODE (type) == UNION_TYPE
12482 || TREE_CODE (type) == QUAL_UNION_TYPE)
12483 && TYPE_FIELDS (type))
12485 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12487 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12490 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12491 || TREE_CODE (type) == INTEGER_TYPE)
12493 if (TYPE_MODE (type) == DFmode && align < 64)
12495 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12502 /* Compute the alignment for a local variable.
12503 TYPE is the data type, and ALIGN is the alignment that
12504 the object would ordinarily have. The value of this macro is used
12505 instead of that alignment to align the object. */
12508 ix86_local_alignment (tree type, int align)
12510 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12511 to 16byte boundary. */
12514 if (AGGREGATE_TYPE_P (type)
12515 && TYPE_SIZE (type)
12516 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12517 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12518 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12521 if (TREE_CODE (type) == ARRAY_TYPE)
12523 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12525 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12528 else if (TREE_CODE (type) == COMPLEX_TYPE)
12530 if (TYPE_MODE (type) == DCmode && align < 64)
12532 if (TYPE_MODE (type) == XCmode && align < 128)
12535 else if ((TREE_CODE (type) == RECORD_TYPE
12536 || TREE_CODE (type) == UNION_TYPE
12537 || TREE_CODE (type) == QUAL_UNION_TYPE)
12538 && TYPE_FIELDS (type))
12540 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12542 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12545 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12546 || TREE_CODE (type) == INTEGER_TYPE)
12549 if (TYPE_MODE (type) == DFmode && align < 64)
12551 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12557 /* Emit RTL insns to initialize the variable parts of a trampoline.
12558 FNADDR is an RTX for the address of the function's pure code.
12559 CXT is an RTX for the static chain value for the function. */
12561 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12565 /* Compute offset from the end of the jmp to the target function. */
12566 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12567 plus_constant (tramp, 10),
12568 NULL_RTX, 1, OPTAB_DIRECT);
12569 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12570 gen_int_mode (0xb9, QImode));
12571 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12572 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12573 gen_int_mode (0xe9, QImode));
12574 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12579 /* Try to load address using shorter movl instead of movabs.
12580 We may want to support movq for kernel mode, but kernel does not use
12581 trampolines at the moment. */
12582 if (x86_64_zero_extended_value (fnaddr))
12584 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12585 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12586 gen_int_mode (0xbb41, HImode));
12587 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12588 gen_lowpart (SImode, fnaddr));
12593 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12594 gen_int_mode (0xbb49, HImode));
12595 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12599 /* Load static chain using movabs to r10. */
12600 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12601 gen_int_mode (0xba49, HImode));
12602 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12605 /* Jump to the r11 */
12606 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12607 gen_int_mode (0xff49, HImode));
12608 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12609 gen_int_mode (0xe3, QImode));
12611 if (offset > TRAMPOLINE_SIZE)
12615 #ifdef TRANSFER_FROM_TRAMPOLINE
12616 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12617 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12621 #define def_builtin(MASK, NAME, TYPE, CODE) \
12623 if ((MASK) & target_flags \
12624 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12625 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12626 NULL, NULL_TREE); \
12629 struct builtin_description
12631 const unsigned int mask;
12632 const enum insn_code icode;
12633 const char *const name;
12634 const enum ix86_builtins code;
12635 const enum rtx_code comparison;
12636 const unsigned int flag;
12639 static const struct builtin_description bdesc_comi[] =
12641 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12642 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12643 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12644 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12645 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12646 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12647 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12648 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12649 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12650 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12651 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12652 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12653 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12654 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12655 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12656 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12657 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12658 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12659 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12660 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12661 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12662 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12663 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12664 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12667 static const struct builtin_description bdesc_2arg[] =
12670 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12671 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12672 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12673 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12674 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12675 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12676 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12677 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12679 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12680 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12681 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12682 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12683 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12684 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12685 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12686 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12687 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12688 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12689 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12690 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12691 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12692 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12693 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12694 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12695 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12696 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12697 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12698 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12700 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12701 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12702 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12703 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12705 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12706 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12707 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12708 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12710 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12711 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12712 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12713 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12714 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12717 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12718 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12719 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12720 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12721 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12722 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12723 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12724 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12726 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12727 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12728 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12729 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12730 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12731 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12732 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12733 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12735 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12736 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12737 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12739 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12740 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12741 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12742 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12744 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12745 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12747 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12748 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12749 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12750 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12751 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12752 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12754 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12755 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12756 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12757 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12759 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12760 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12761 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12762 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12763 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12764 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12767 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12768 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12769 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12771 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12772 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12773 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12775 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12776 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12777 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12778 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12779 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12780 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12782 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12783 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12784 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12785 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12786 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12787 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12789 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12790 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12791 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12792 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12794 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12795 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12798 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12801 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12802 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12804 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12808 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12809 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12810 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12811 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12812 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12813 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12814 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12815 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12816 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12817 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12818 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12819 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12820 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12821 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12822 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12823 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12824 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12825 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12826 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12828 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12830 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12839 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12844 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12845 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12846 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12849 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12850 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12852 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12853 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12854 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12855 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12856 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12857 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12858 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12859 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12861 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12862 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12863 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12864 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12866 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12867 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12868 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12879 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12881 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12884 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12890 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12892 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12895 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12912 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12913 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12923 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12924 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12925 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12926 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12929 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12930 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12931 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12932 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12933 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12934 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12937 static const struct builtin_description bdesc_1arg[] =
12939 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12940 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12942 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12943 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12944 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12946 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12947 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12948 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12949 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12950 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12951 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12956 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12973 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12974 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12983 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12984 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12985 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12989 ix86_init_builtins (void)
12992 ix86_init_mmx_sse_builtins ();
12995 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12996 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12999 ix86_init_mmx_sse_builtins (void)
13001 const struct builtin_description * d;
13004 tree pchar_type_node = build_pointer_type (char_type_node);
13005 tree pcchar_type_node = build_pointer_type (
13006 build_type_variant (char_type_node, 1, 0));
13007 tree pfloat_type_node = build_pointer_type (float_type_node);
13008 tree pcfloat_type_node = build_pointer_type (
13009 build_type_variant (float_type_node, 1, 0));
13010 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13011 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13012 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13015 tree int_ftype_v4sf_v4sf
13016 = build_function_type_list (integer_type_node,
13017 V4SF_type_node, V4SF_type_node, NULL_TREE);
13018 tree v4si_ftype_v4sf_v4sf
13019 = build_function_type_list (V4SI_type_node,
13020 V4SF_type_node, V4SF_type_node, NULL_TREE);
13021 /* MMX/SSE/integer conversions. */
13022 tree int_ftype_v4sf
13023 = build_function_type_list (integer_type_node,
13024 V4SF_type_node, NULL_TREE);
13025 tree int64_ftype_v4sf
13026 = build_function_type_list (long_long_integer_type_node,
13027 V4SF_type_node, NULL_TREE);
13028 tree int_ftype_v8qi
13029 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13030 tree v4sf_ftype_v4sf_int
13031 = build_function_type_list (V4SF_type_node,
13032 V4SF_type_node, integer_type_node, NULL_TREE);
13033 tree v4sf_ftype_v4sf_int64
13034 = build_function_type_list (V4SF_type_node,
13035 V4SF_type_node, long_long_integer_type_node,
13037 tree v4sf_ftype_v4sf_v2si
13038 = build_function_type_list (V4SF_type_node,
13039 V4SF_type_node, V2SI_type_node, NULL_TREE);
13040 tree int_ftype_v4hi_int
13041 = build_function_type_list (integer_type_node,
13042 V4HI_type_node, integer_type_node, NULL_TREE);
13043 tree v4hi_ftype_v4hi_int_int
13044 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13045 integer_type_node, integer_type_node,
13047 /* Miscellaneous. */
13048 tree v8qi_ftype_v4hi_v4hi
13049 = build_function_type_list (V8QI_type_node,
13050 V4HI_type_node, V4HI_type_node, NULL_TREE);
13051 tree v4hi_ftype_v2si_v2si
13052 = build_function_type_list (V4HI_type_node,
13053 V2SI_type_node, V2SI_type_node, NULL_TREE);
13054 tree v4sf_ftype_v4sf_v4sf_int
13055 = build_function_type_list (V4SF_type_node,
13056 V4SF_type_node, V4SF_type_node,
13057 integer_type_node, NULL_TREE);
13058 tree v2si_ftype_v4hi_v4hi
13059 = build_function_type_list (V2SI_type_node,
13060 V4HI_type_node, V4HI_type_node, NULL_TREE);
13061 tree v4hi_ftype_v4hi_int
13062 = build_function_type_list (V4HI_type_node,
13063 V4HI_type_node, integer_type_node, NULL_TREE);
13064 tree v4hi_ftype_v4hi_di
13065 = build_function_type_list (V4HI_type_node,
13066 V4HI_type_node, long_long_unsigned_type_node,
13068 tree v2si_ftype_v2si_di
13069 = build_function_type_list (V2SI_type_node,
13070 V2SI_type_node, long_long_unsigned_type_node,
13072 tree void_ftype_void
13073 = build_function_type (void_type_node, void_list_node);
13074 tree void_ftype_unsigned
13075 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13076 tree void_ftype_unsigned_unsigned
13077 = build_function_type_list (void_type_node, unsigned_type_node,
13078 unsigned_type_node, NULL_TREE);
13079 tree void_ftype_pcvoid_unsigned_unsigned
13080 = build_function_type_list (void_type_node, const_ptr_type_node,
13081 unsigned_type_node, unsigned_type_node,
13083 tree unsigned_ftype_void
13084 = build_function_type (unsigned_type_node, void_list_node);
13086 = build_function_type (long_long_unsigned_type_node, void_list_node);
13087 tree v4sf_ftype_void
13088 = build_function_type (V4SF_type_node, void_list_node);
13089 tree v2si_ftype_v4sf
13090 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13091 /* Loads/stores. */
13092 tree void_ftype_v8qi_v8qi_pchar
13093 = build_function_type_list (void_type_node,
13094 V8QI_type_node, V8QI_type_node,
13095 pchar_type_node, NULL_TREE);
13096 tree v4sf_ftype_pcfloat
13097 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13098 /* @@@ the type is bogus */
13099 tree v4sf_ftype_v4sf_pv2si
13100 = build_function_type_list (V4SF_type_node,
13101 V4SF_type_node, pv2si_type_node, NULL_TREE);
13102 tree void_ftype_pv2si_v4sf
13103 = build_function_type_list (void_type_node,
13104 pv2si_type_node, V4SF_type_node, NULL_TREE);
13105 tree void_ftype_pfloat_v4sf
13106 = build_function_type_list (void_type_node,
13107 pfloat_type_node, V4SF_type_node, NULL_TREE);
13108 tree void_ftype_pdi_di
13109 = build_function_type_list (void_type_node,
13110 pdi_type_node, long_long_unsigned_type_node,
13112 tree void_ftype_pv2di_v2di
13113 = build_function_type_list (void_type_node,
13114 pv2di_type_node, V2DI_type_node, NULL_TREE);
13115 /* Normal vector unops. */
13116 tree v4sf_ftype_v4sf
13117 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13119 /* Normal vector binops. */
13120 tree v4sf_ftype_v4sf_v4sf
13121 = build_function_type_list (V4SF_type_node,
13122 V4SF_type_node, V4SF_type_node, NULL_TREE);
13123 tree v8qi_ftype_v8qi_v8qi
13124 = build_function_type_list (V8QI_type_node,
13125 V8QI_type_node, V8QI_type_node, NULL_TREE);
13126 tree v4hi_ftype_v4hi_v4hi
13127 = build_function_type_list (V4HI_type_node,
13128 V4HI_type_node, V4HI_type_node, NULL_TREE);
13129 tree v2si_ftype_v2si_v2si
13130 = build_function_type_list (V2SI_type_node,
13131 V2SI_type_node, V2SI_type_node, NULL_TREE);
13132 tree di_ftype_di_di
13133 = build_function_type_list (long_long_unsigned_type_node,
13134 long_long_unsigned_type_node,
13135 long_long_unsigned_type_node, NULL_TREE);
13137 tree v2si_ftype_v2sf
13138 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13139 tree v2sf_ftype_v2si
13140 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13141 tree v2si_ftype_v2si
13142 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13143 tree v2sf_ftype_v2sf
13144 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13145 tree v2sf_ftype_v2sf_v2sf
13146 = build_function_type_list (V2SF_type_node,
13147 V2SF_type_node, V2SF_type_node, NULL_TREE);
13148 tree v2si_ftype_v2sf_v2sf
13149 = build_function_type_list (V2SI_type_node,
13150 V2SF_type_node, V2SF_type_node, NULL_TREE);
13151 tree pint_type_node = build_pointer_type (integer_type_node);
13152 tree pcint_type_node = build_pointer_type (
13153 build_type_variant (integer_type_node, 1, 0));
13154 tree pdouble_type_node = build_pointer_type (double_type_node);
13155 tree pcdouble_type_node = build_pointer_type (
13156 build_type_variant (double_type_node, 1, 0));
13157 tree int_ftype_v2df_v2df
13158 = build_function_type_list (integer_type_node,
13159 V2DF_type_node, V2DF_type_node, NULL_TREE);
13162 = build_function_type (intTI_type_node, void_list_node);
13163 tree v2di_ftype_void
13164 = build_function_type (V2DI_type_node, void_list_node);
13165 tree ti_ftype_ti_ti
13166 = build_function_type_list (intTI_type_node,
13167 intTI_type_node, intTI_type_node, NULL_TREE);
13168 tree void_ftype_pcvoid
13169 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13171 = build_function_type_list (V2DI_type_node,
13172 long_long_unsigned_type_node, NULL_TREE);
13174 = build_function_type_list (long_long_unsigned_type_node,
13175 V2DI_type_node, NULL_TREE);
13176 tree v4sf_ftype_v4si
13177 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13178 tree v4si_ftype_v4sf
13179 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13180 tree v2df_ftype_v4si
13181 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13182 tree v4si_ftype_v2df
13183 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13184 tree v2si_ftype_v2df
13185 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13186 tree v4sf_ftype_v2df
13187 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13188 tree v2df_ftype_v2si
13189 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13190 tree v2df_ftype_v4sf
13191 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13192 tree int_ftype_v2df
13193 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13194 tree int64_ftype_v2df
13195 = build_function_type_list (long_long_integer_type_node,
13196 V2DF_type_node, NULL_TREE);
13197 tree v2df_ftype_v2df_int
13198 = build_function_type_list (V2DF_type_node,
13199 V2DF_type_node, integer_type_node, NULL_TREE);
13200 tree v2df_ftype_v2df_int64
13201 = build_function_type_list (V2DF_type_node,
13202 V2DF_type_node, long_long_integer_type_node,
13204 tree v4sf_ftype_v4sf_v2df
13205 = build_function_type_list (V4SF_type_node,
13206 V4SF_type_node, V2DF_type_node, NULL_TREE);
13207 tree v2df_ftype_v2df_v4sf
13208 = build_function_type_list (V2DF_type_node,
13209 V2DF_type_node, V4SF_type_node, NULL_TREE);
13210 tree v2df_ftype_v2df_v2df_int
13211 = build_function_type_list (V2DF_type_node,
13212 V2DF_type_node, V2DF_type_node,
13215 tree v2df_ftype_v2df_pv2si
13216 = build_function_type_list (V2DF_type_node,
13217 V2DF_type_node, pv2si_type_node, NULL_TREE);
13218 tree void_ftype_pv2si_v2df
13219 = build_function_type_list (void_type_node,
13220 pv2si_type_node, V2DF_type_node, NULL_TREE);
13221 tree void_ftype_pdouble_v2df
13222 = build_function_type_list (void_type_node,
13223 pdouble_type_node, V2DF_type_node, NULL_TREE);
13224 tree void_ftype_pint_int
13225 = build_function_type_list (void_type_node,
13226 pint_type_node, integer_type_node, NULL_TREE);
13227 tree void_ftype_v16qi_v16qi_pchar
13228 = build_function_type_list (void_type_node,
13229 V16QI_type_node, V16QI_type_node,
13230 pchar_type_node, NULL_TREE);
13231 tree v2df_ftype_pcdouble
13232 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13233 tree v2df_ftype_v2df_v2df
13234 = build_function_type_list (V2DF_type_node,
13235 V2DF_type_node, V2DF_type_node, NULL_TREE);
13236 tree v16qi_ftype_v16qi_v16qi
13237 = build_function_type_list (V16QI_type_node,
13238 V16QI_type_node, V16QI_type_node, NULL_TREE);
13239 tree v8hi_ftype_v8hi_v8hi
13240 = build_function_type_list (V8HI_type_node,
13241 V8HI_type_node, V8HI_type_node, NULL_TREE);
13242 tree v4si_ftype_v4si_v4si
13243 = build_function_type_list (V4SI_type_node,
13244 V4SI_type_node, V4SI_type_node, NULL_TREE);
13245 tree v2di_ftype_v2di_v2di
13246 = build_function_type_list (V2DI_type_node,
13247 V2DI_type_node, V2DI_type_node, NULL_TREE);
13248 tree v2di_ftype_v2df_v2df
13249 = build_function_type_list (V2DI_type_node,
13250 V2DF_type_node, V2DF_type_node, NULL_TREE);
13251 tree v2df_ftype_v2df
13252 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13253 tree v2df_ftype_double
13254 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13255 tree v2df_ftype_double_double
13256 = build_function_type_list (V2DF_type_node,
13257 double_type_node, double_type_node, NULL_TREE);
13258 tree int_ftype_v8hi_int
13259 = build_function_type_list (integer_type_node,
13260 V8HI_type_node, integer_type_node, NULL_TREE);
13261 tree v8hi_ftype_v8hi_int_int
13262 = build_function_type_list (V8HI_type_node,
13263 V8HI_type_node, integer_type_node,
13264 integer_type_node, NULL_TREE);
13265 tree v2di_ftype_v2di_int
13266 = build_function_type_list (V2DI_type_node,
13267 V2DI_type_node, integer_type_node, NULL_TREE);
13268 tree v4si_ftype_v4si_int
13269 = build_function_type_list (V4SI_type_node,
13270 V4SI_type_node, integer_type_node, NULL_TREE);
13271 tree v8hi_ftype_v8hi_int
13272 = build_function_type_list (V8HI_type_node,
13273 V8HI_type_node, integer_type_node, NULL_TREE);
13274 tree v8hi_ftype_v8hi_v2di
13275 = build_function_type_list (V8HI_type_node,
13276 V8HI_type_node, V2DI_type_node, NULL_TREE);
13277 tree v4si_ftype_v4si_v2di
13278 = build_function_type_list (V4SI_type_node,
13279 V4SI_type_node, V2DI_type_node, NULL_TREE);
13280 tree v4si_ftype_v8hi_v8hi
13281 = build_function_type_list (V4SI_type_node,
13282 V8HI_type_node, V8HI_type_node, NULL_TREE);
13283 tree di_ftype_v8qi_v8qi
13284 = build_function_type_list (long_long_unsigned_type_node,
13285 V8QI_type_node, V8QI_type_node, NULL_TREE);
13286 tree v2di_ftype_v16qi_v16qi
13287 = build_function_type_list (V2DI_type_node,
13288 V16QI_type_node, V16QI_type_node, NULL_TREE);
13289 tree int_ftype_v16qi
13290 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13291 tree v16qi_ftype_pcchar
13292 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13293 tree void_ftype_pchar_v16qi
13294 = build_function_type_list (void_type_node,
13295 pchar_type_node, V16QI_type_node, NULL_TREE);
13296 tree v4si_ftype_pcint
13297 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13298 tree void_ftype_pcint_v4si
13299 = build_function_type_list (void_type_node,
13300 pcint_type_node, V4SI_type_node, NULL_TREE);
13301 tree v2di_ftype_v2di
13302 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13304 /* Add all builtins that are more or less simple operations on two
13306 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13308 /* Use one of the operands; the target can have a different mode for
13309 mask-generating compares. */
13310 enum machine_mode mode;
13315 mode = insn_data[d->icode].operand[1].mode;
13320 type = v16qi_ftype_v16qi_v16qi;
13323 type = v8hi_ftype_v8hi_v8hi;
13326 type = v4si_ftype_v4si_v4si;
13329 type = v2di_ftype_v2di_v2di;
13332 type = v2df_ftype_v2df_v2df;
13335 type = ti_ftype_ti_ti;
13338 type = v4sf_ftype_v4sf_v4sf;
13341 type = v8qi_ftype_v8qi_v8qi;
13344 type = v4hi_ftype_v4hi_v4hi;
13347 type = v2si_ftype_v2si_v2si;
13350 type = di_ftype_di_di;
13357 /* Override for comparisons. */
13358 if (d->icode == CODE_FOR_maskcmpv4sf3
13359 || d->icode == CODE_FOR_maskncmpv4sf3
13360 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13361 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13362 type = v4si_ftype_v4sf_v4sf;
13364 if (d->icode == CODE_FOR_maskcmpv2df3
13365 || d->icode == CODE_FOR_maskncmpv2df3
13366 || d->icode == CODE_FOR_vmmaskcmpv2df3
13367 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13368 type = v2di_ftype_v2df_v2df;
13370 def_builtin (d->mask, d->name, type, d->code);
13373 /* Add the remaining MMX insns with somewhat more complicated types. */
13374 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13375 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13376 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13377 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13378 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13380 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13381 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13382 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13384 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13385 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13387 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13388 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13390 /* comi/ucomi insns. */
13391 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13392 if (d->mask == MASK_SSE2)
13393 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13395 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13397 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13398 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13399 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13401 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13402 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13403 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13404 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13405 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13406 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13407 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13408 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13409 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13410 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13411 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13413 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13414 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13416 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13418 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13419 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13420 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13421 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13422 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13423 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13425 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13426 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13427 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13428 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13430 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13431 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13432 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13433 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13435 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13437 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13439 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13440 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13441 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13442 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13443 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13444 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13446 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13448 /* Original 3DNow! */
13449 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13450 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13451 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13452 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13453 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13454 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13455 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13456 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13457 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13458 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13459 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13460 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13461 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13462 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13463 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13464 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13465 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13466 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13467 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13468 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13470 /* 3DNow! extension as used in the Athlon CPU. */
13471 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13472 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13473 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13474 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13475 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13476 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13478 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13481 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13482 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13484 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13485 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13486 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13488 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13489 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13490 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13491 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13492 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13495 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13498 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13500 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13501 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13508 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13511 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13512 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13514 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13516 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13517 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13519 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13520 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13521 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13522 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13523 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13525 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13527 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13528 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13529 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13530 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13532 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13533 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13534 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13536 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13537 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13538 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13539 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13541 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13542 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13543 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13544 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13545 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13546 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13547 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13549 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13550 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13551 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13553 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13554 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13555 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13556 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13557 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13558 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13561 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13565 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13572 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13589 /* Prescott New Instructions. */
13590 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13591 void_ftype_pcvoid_unsigned_unsigned,
13592 IX86_BUILTIN_MONITOR);
13593 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13594 void_ftype_unsigned_unsigned,
13595 IX86_BUILTIN_MWAIT);
13596 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13598 IX86_BUILTIN_MOVSHDUP);
13599 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13601 IX86_BUILTIN_MOVSLDUP);
13602 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13603 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13604 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13605 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13606 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13607 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13610 /* Errors in the source file can cause expand_expr to return const0_rtx
13611 where we expect a vector. To avoid crashing, use one of the vector
13612 clear instructions. */
13614 safe_vector_operand (rtx x, enum machine_mode mode)
13616 if (x != const0_rtx)
13618 x = gen_reg_rtx (mode);
13620 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13621 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13622 : gen_rtx_SUBREG (DImode, x, 0)));
13624 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13625 : gen_rtx_SUBREG (V4SFmode, x, 0),
13626 CONST0_RTX (V4SFmode)));
13630 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13633 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13636 tree arg0 = TREE_VALUE (arglist);
13637 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13638 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13639 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13640 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13641 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13642 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13644 if (VECTOR_MODE_P (mode0))
13645 op0 = safe_vector_operand (op0, mode0);
13646 if (VECTOR_MODE_P (mode1))
13647 op1 = safe_vector_operand (op1, mode1);
13650 || GET_MODE (target) != tmode
13651 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13652 target = gen_reg_rtx (tmode);
13654 if (GET_MODE (op1) == SImode && mode1 == TImode)
13656 rtx x = gen_reg_rtx (V4SImode);
13657 emit_insn (gen_sse2_loadd (x, op1));
13658 op1 = gen_lowpart (TImode, x);
13661 /* In case the insn wants input operands in modes different from
13662 the result, abort. */
13663 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13664 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13667 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13668 op0 = copy_to_mode_reg (mode0, op0);
13669 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13670 op1 = copy_to_mode_reg (mode1, op1);
13672 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13673 yet one of the two must not be a memory. This is normally enforced
13674 by expanders, but we didn't bother to create one here. */
13675 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13676 op0 = copy_to_mode_reg (mode0, op0);
13678 pat = GEN_FCN (icode) (target, op0, op1);
13685 /* Subroutine of ix86_expand_builtin to take care of stores. */
13688 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13691 tree arg0 = TREE_VALUE (arglist);
13692 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13693 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13694 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13695 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13696 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13698 if (VECTOR_MODE_P (mode1))
13699 op1 = safe_vector_operand (op1, mode1);
13701 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13702 op1 = copy_to_mode_reg (mode1, op1);
13704 pat = GEN_FCN (icode) (op0, op1);
13710 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13713 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13714 rtx target, int do_load)
13717 tree arg0 = TREE_VALUE (arglist);
13718 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13719 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13720 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13723 || GET_MODE (target) != tmode
13724 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13725 target = gen_reg_rtx (tmode);
13727 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13730 if (VECTOR_MODE_P (mode0))
13731 op0 = safe_vector_operand (op0, mode0);
13733 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13734 op0 = copy_to_mode_reg (mode0, op0);
13737 pat = GEN_FCN (icode) (target, op0);
13744 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13745 sqrtss, rsqrtss, rcpss. */
13748 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13751 tree arg0 = TREE_VALUE (arglist);
13752 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13753 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13754 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13757 || GET_MODE (target) != tmode
13758 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13759 target = gen_reg_rtx (tmode);
13761 if (VECTOR_MODE_P (mode0))
13762 op0 = safe_vector_operand (op0, mode0);
13764 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13765 op0 = copy_to_mode_reg (mode0, op0);
13768 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13769 op1 = copy_to_mode_reg (mode0, op1);
13771 pat = GEN_FCN (icode) (target, op0, op1);
13778 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13781 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13785 tree arg0 = TREE_VALUE (arglist);
13786 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13787 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13788 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13790 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13791 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13792 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13793 enum rtx_code comparison = d->comparison;
13795 if (VECTOR_MODE_P (mode0))
13796 op0 = safe_vector_operand (op0, mode0);
13797 if (VECTOR_MODE_P (mode1))
13798 op1 = safe_vector_operand (op1, mode1);
13800 /* Swap operands if we have a comparison that isn't available in
13804 rtx tmp = gen_reg_rtx (mode1);
13805 emit_move_insn (tmp, op1);
13811 || GET_MODE (target) != tmode
13812 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13813 target = gen_reg_rtx (tmode);
13815 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13816 op0 = copy_to_mode_reg (mode0, op0);
13817 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13818 op1 = copy_to_mode_reg (mode1, op1);
13820 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13821 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13828 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13831 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13835 tree arg0 = TREE_VALUE (arglist);
13836 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13837 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13838 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13840 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13841 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13842 enum rtx_code comparison = d->comparison;
13844 if (VECTOR_MODE_P (mode0))
13845 op0 = safe_vector_operand (op0, mode0);
13846 if (VECTOR_MODE_P (mode1))
13847 op1 = safe_vector_operand (op1, mode1);
13849 /* Swap operands if we have a comparison that isn't available in
13858 target = gen_reg_rtx (SImode);
13859 emit_move_insn (target, const0_rtx);
13860 target = gen_rtx_SUBREG (QImode, target, 0);
13862 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13863 op0 = copy_to_mode_reg (mode0, op0);
13864 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13865 op1 = copy_to_mode_reg (mode1, op1);
13867 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13868 pat = GEN_FCN (d->icode) (op0, op1);
13872 emit_insn (gen_rtx_SET (VOIDmode,
13873 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13874 gen_rtx_fmt_ee (comparison, QImode,
13878 return SUBREG_REG (target);
13881 /* Expand an expression EXP that calls a built-in function,
13882 with result going to TARGET if that's convenient
13883 (and in mode MODE if that's convenient).
13884 SUBTARGET may be used as the target for computing one of EXP's operands.
13885 IGNORE is nonzero if the value is to be ignored. */
13888 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13889 enum machine_mode mode ATTRIBUTE_UNUSED,
13890 int ignore ATTRIBUTE_UNUSED)
13892 const struct builtin_description *d;
13894 enum insn_code icode;
13895 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13896 tree arglist = TREE_OPERAND (exp, 1);
13897 tree arg0, arg1, arg2;
13898 rtx op0, op1, op2, pat;
13899 enum machine_mode tmode, mode0, mode1, mode2;
13900 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13904 case IX86_BUILTIN_EMMS:
13905 emit_insn (gen_emms ());
13908 case IX86_BUILTIN_SFENCE:
13909 emit_insn (gen_sfence ());
13912 case IX86_BUILTIN_PEXTRW:
13913 case IX86_BUILTIN_PEXTRW128:
13914 icode = (fcode == IX86_BUILTIN_PEXTRW
13915 ? CODE_FOR_mmx_pextrw
13916 : CODE_FOR_sse2_pextrw);
13917 arg0 = TREE_VALUE (arglist);
13918 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13919 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13920 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13921 tmode = insn_data[icode].operand[0].mode;
13922 mode0 = insn_data[icode].operand[1].mode;
13923 mode1 = insn_data[icode].operand[2].mode;
13925 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13926 op0 = copy_to_mode_reg (mode0, op0);
13927 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13929 error ("selector must be an integer constant in the range 0..%i",
13930 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13931 return gen_reg_rtx (tmode);
13934 || GET_MODE (target) != tmode
13935 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13936 target = gen_reg_rtx (tmode);
13937 pat = GEN_FCN (icode) (target, op0, op1);
13943 case IX86_BUILTIN_PINSRW:
13944 case IX86_BUILTIN_PINSRW128:
13945 icode = (fcode == IX86_BUILTIN_PINSRW
13946 ? CODE_FOR_mmx_pinsrw
13947 : CODE_FOR_sse2_pinsrw);
13948 arg0 = TREE_VALUE (arglist);
13949 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13950 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13951 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13952 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13953 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13954 tmode = insn_data[icode].operand[0].mode;
13955 mode0 = insn_data[icode].operand[1].mode;
13956 mode1 = insn_data[icode].operand[2].mode;
13957 mode2 = insn_data[icode].operand[3].mode;
13959 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13960 op0 = copy_to_mode_reg (mode0, op0);
13961 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13962 op1 = copy_to_mode_reg (mode1, op1);
13963 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13965 error ("selector must be an integer constant in the range 0..%i",
13966 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13970 || GET_MODE (target) != tmode
13971 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13972 target = gen_reg_rtx (tmode);
13973 pat = GEN_FCN (icode) (target, op0, op1, op2);
13979 case IX86_BUILTIN_MASKMOVQ:
13980 case IX86_BUILTIN_MASKMOVDQU:
13981 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13982 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13983 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13984 : CODE_FOR_sse2_maskmovdqu));
13985 /* Note the arg order is different from the operand order. */
13986 arg1 = TREE_VALUE (arglist);
13987 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13988 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13989 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13990 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13991 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13992 mode0 = insn_data[icode].operand[0].mode;
13993 mode1 = insn_data[icode].operand[1].mode;
13994 mode2 = insn_data[icode].operand[2].mode;
13996 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13997 op0 = copy_to_mode_reg (mode0, op0);
13998 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13999 op1 = copy_to_mode_reg (mode1, op1);
14000 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14001 op2 = copy_to_mode_reg (mode2, op2);
14002 pat = GEN_FCN (icode) (op0, op1, op2);
14008 case IX86_BUILTIN_SQRTSS:
14009 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14010 case IX86_BUILTIN_RSQRTSS:
14011 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14012 case IX86_BUILTIN_RCPSS:
14013 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14015 case IX86_BUILTIN_LOADAPS:
14016 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14018 case IX86_BUILTIN_LOADUPS:
14019 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14021 case IX86_BUILTIN_STOREAPS:
14022 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14024 case IX86_BUILTIN_STOREUPS:
14025 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14027 case IX86_BUILTIN_LOADSS:
14028 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14030 case IX86_BUILTIN_STORESS:
14031 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14033 case IX86_BUILTIN_LOADHPS:
14034 case IX86_BUILTIN_LOADLPS:
14035 case IX86_BUILTIN_LOADHPD:
14036 case IX86_BUILTIN_LOADLPD:
14037 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14038 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14039 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14040 : CODE_FOR_sse2_movlpd);
14041 arg0 = TREE_VALUE (arglist);
14042 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14043 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14044 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14045 tmode = insn_data[icode].operand[0].mode;
14046 mode0 = insn_data[icode].operand[1].mode;
14047 mode1 = insn_data[icode].operand[2].mode;
14049 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14050 op0 = copy_to_mode_reg (mode0, op0);
14051 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14053 || GET_MODE (target) != tmode
14054 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14055 target = gen_reg_rtx (tmode);
14056 pat = GEN_FCN (icode) (target, op0, op1);
14062 case IX86_BUILTIN_STOREHPS:
14063 case IX86_BUILTIN_STORELPS:
14064 case IX86_BUILTIN_STOREHPD:
14065 case IX86_BUILTIN_STORELPD:
14066 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14067 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14068 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14069 : CODE_FOR_sse2_movlpd);
14070 arg0 = TREE_VALUE (arglist);
14071 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14072 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14073 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14074 mode0 = insn_data[icode].operand[1].mode;
14075 mode1 = insn_data[icode].operand[2].mode;
14077 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14078 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14079 op1 = copy_to_mode_reg (mode1, op1);
14081 pat = GEN_FCN (icode) (op0, op0, op1);
14087 case IX86_BUILTIN_MOVNTPS:
14088 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14089 case IX86_BUILTIN_MOVNTQ:
14090 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14092 case IX86_BUILTIN_LDMXCSR:
14093 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14094 target = assign_386_stack_local (SImode, 0);
14095 emit_move_insn (target, op0);
14096 emit_insn (gen_ldmxcsr (target));
14099 case IX86_BUILTIN_STMXCSR:
14100 target = assign_386_stack_local (SImode, 0);
14101 emit_insn (gen_stmxcsr (target));
14102 return copy_to_mode_reg (SImode, target);
14104 case IX86_BUILTIN_SHUFPS:
14105 case IX86_BUILTIN_SHUFPD:
14106 icode = (fcode == IX86_BUILTIN_SHUFPS
14107 ? CODE_FOR_sse_shufps
14108 : CODE_FOR_sse2_shufpd);
14109 arg0 = TREE_VALUE (arglist);
14110 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14111 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14112 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14113 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14114 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14115 tmode = insn_data[icode].operand[0].mode;
14116 mode0 = insn_data[icode].operand[1].mode;
14117 mode1 = insn_data[icode].operand[2].mode;
14118 mode2 = insn_data[icode].operand[3].mode;
14120 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14121 op0 = copy_to_mode_reg (mode0, op0);
14122 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14123 op1 = copy_to_mode_reg (mode1, op1);
14124 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14126 /* @@@ better error message */
14127 error ("mask must be an immediate");
14128 return gen_reg_rtx (tmode);
14131 || GET_MODE (target) != tmode
14132 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14133 target = gen_reg_rtx (tmode);
14134 pat = GEN_FCN (icode) (target, op0, op1, op2);
14140 case IX86_BUILTIN_PSHUFW:
14141 case IX86_BUILTIN_PSHUFD:
14142 case IX86_BUILTIN_PSHUFHW:
14143 case IX86_BUILTIN_PSHUFLW:
14144 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14145 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14146 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14147 : CODE_FOR_mmx_pshufw);
14148 arg0 = TREE_VALUE (arglist);
14149 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14150 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14151 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14152 tmode = insn_data[icode].operand[0].mode;
14153 mode1 = insn_data[icode].operand[1].mode;
14154 mode2 = insn_data[icode].operand[2].mode;
14156 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14157 op0 = copy_to_mode_reg (mode1, op0);
14158 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14160 /* @@@ better error message */
14161 error ("mask must be an immediate");
14165 || GET_MODE (target) != tmode
14166 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14167 target = gen_reg_rtx (tmode);
14168 pat = GEN_FCN (icode) (target, op0, op1);
14174 case IX86_BUILTIN_PSLLDQI128:
14175 case IX86_BUILTIN_PSRLDQI128:
14176 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14177 : CODE_FOR_sse2_lshrti3);
14178 arg0 = TREE_VALUE (arglist);
14179 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14180 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14181 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14182 tmode = insn_data[icode].operand[0].mode;
14183 mode1 = insn_data[icode].operand[1].mode;
14184 mode2 = insn_data[icode].operand[2].mode;
14186 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14188 op0 = copy_to_reg (op0);
14189 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14191 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14193 error ("shift must be an immediate");
14196 target = gen_reg_rtx (V2DImode);
14197 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14203 case IX86_BUILTIN_FEMMS:
14204 emit_insn (gen_femms ());
14207 case IX86_BUILTIN_PAVGUSB:
14208 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14210 case IX86_BUILTIN_PF2ID:
14211 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14213 case IX86_BUILTIN_PFACC:
14214 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14216 case IX86_BUILTIN_PFADD:
14217 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14219 case IX86_BUILTIN_PFCMPEQ:
14220 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14222 case IX86_BUILTIN_PFCMPGE:
14223 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14225 case IX86_BUILTIN_PFCMPGT:
14226 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14228 case IX86_BUILTIN_PFMAX:
14229 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14231 case IX86_BUILTIN_PFMIN:
14232 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14234 case IX86_BUILTIN_PFMUL:
14235 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14237 case IX86_BUILTIN_PFRCP:
14238 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14240 case IX86_BUILTIN_PFRCPIT1:
14241 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14243 case IX86_BUILTIN_PFRCPIT2:
14244 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14246 case IX86_BUILTIN_PFRSQIT1:
14247 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14249 case IX86_BUILTIN_PFRSQRT:
14250 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14252 case IX86_BUILTIN_PFSUB:
14253 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14255 case IX86_BUILTIN_PFSUBR:
14256 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14258 case IX86_BUILTIN_PI2FD:
14259 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14261 case IX86_BUILTIN_PMULHRW:
14262 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14264 case IX86_BUILTIN_PF2IW:
14265 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14267 case IX86_BUILTIN_PFNACC:
14268 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14270 case IX86_BUILTIN_PFPNACC:
14271 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14273 case IX86_BUILTIN_PI2FW:
14274 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14276 case IX86_BUILTIN_PSWAPDSI:
14277 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14279 case IX86_BUILTIN_PSWAPDSF:
14280 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14282 case IX86_BUILTIN_SSE_ZERO:
14283 target = gen_reg_rtx (V4SFmode);
14284 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14287 case IX86_BUILTIN_MMX_ZERO:
14288 target = gen_reg_rtx (DImode);
14289 emit_insn (gen_mmx_clrdi (target));
14292 case IX86_BUILTIN_CLRTI:
14293 target = gen_reg_rtx (V2DImode);
14294 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14298 case IX86_BUILTIN_SQRTSD:
14299 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14300 case IX86_BUILTIN_LOADAPD:
14301 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14302 case IX86_BUILTIN_LOADUPD:
14303 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14305 case IX86_BUILTIN_STOREAPD:
14306 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14307 case IX86_BUILTIN_STOREUPD:
14308 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14310 case IX86_BUILTIN_LOADSD:
14311 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14313 case IX86_BUILTIN_STORESD:
14314 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14316 case IX86_BUILTIN_SETPD1:
14317 target = assign_386_stack_local (DFmode, 0);
14318 arg0 = TREE_VALUE (arglist);
14319 emit_move_insn (adjust_address (target, DFmode, 0),
14320 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14321 op0 = gen_reg_rtx (V2DFmode);
14322 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14323 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14326 case IX86_BUILTIN_SETPD:
14327 target = assign_386_stack_local (V2DFmode, 0);
14328 arg0 = TREE_VALUE (arglist);
14329 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14330 emit_move_insn (adjust_address (target, DFmode, 0),
14331 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14332 emit_move_insn (adjust_address (target, DFmode, 8),
14333 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14334 op0 = gen_reg_rtx (V2DFmode);
14335 emit_insn (gen_sse2_movapd (op0, target));
14338 case IX86_BUILTIN_LOADRPD:
14339 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14340 gen_reg_rtx (V2DFmode), 1);
14341 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14344 case IX86_BUILTIN_LOADPD1:
14345 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14346 gen_reg_rtx (V2DFmode), 1);
14347 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14350 case IX86_BUILTIN_STOREPD1:
14351 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14352 case IX86_BUILTIN_STORERPD:
14353 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14355 case IX86_BUILTIN_CLRPD:
14356 target = gen_reg_rtx (V2DFmode);
14357 emit_insn (gen_sse_clrv2df (target));
14360 case IX86_BUILTIN_MFENCE:
14361 emit_insn (gen_sse2_mfence ());
14363 case IX86_BUILTIN_LFENCE:
14364 emit_insn (gen_sse2_lfence ());
14367 case IX86_BUILTIN_CLFLUSH:
14368 arg0 = TREE_VALUE (arglist);
14369 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14370 icode = CODE_FOR_sse2_clflush;
14371 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14372 op0 = copy_to_mode_reg (Pmode, op0);
14374 emit_insn (gen_sse2_clflush (op0));
14377 case IX86_BUILTIN_MOVNTPD:
14378 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14379 case IX86_BUILTIN_MOVNTDQ:
14380 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14381 case IX86_BUILTIN_MOVNTI:
14382 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14384 case IX86_BUILTIN_LOADDQA:
14385 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14386 case IX86_BUILTIN_LOADDQU:
14387 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14388 case IX86_BUILTIN_LOADD:
14389 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14391 case IX86_BUILTIN_STOREDQA:
14392 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14393 case IX86_BUILTIN_STOREDQU:
14394 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14395 case IX86_BUILTIN_STORED:
14396 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14398 case IX86_BUILTIN_MONITOR:
14399 arg0 = TREE_VALUE (arglist);
14400 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14401 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14402 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14403 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14404 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14406 op0 = copy_to_mode_reg (SImode, op0);
14408 op1 = copy_to_mode_reg (SImode, op1);
14410 op2 = copy_to_mode_reg (SImode, op2);
14411 emit_insn (gen_monitor (op0, op1, op2));
14414 case IX86_BUILTIN_MWAIT:
14415 arg0 = TREE_VALUE (arglist);
14416 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14417 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14418 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14420 op0 = copy_to_mode_reg (SImode, op0);
14422 op1 = copy_to_mode_reg (SImode, op1);
14423 emit_insn (gen_mwait (op0, op1));
14426 case IX86_BUILTIN_LOADDDUP:
14427 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14429 case IX86_BUILTIN_LDDQU:
14430 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14437 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14438 if (d->code == fcode)
14440 /* Compares are treated specially. */
14441 if (d->icode == CODE_FOR_maskcmpv4sf3
14442 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14443 || d->icode == CODE_FOR_maskncmpv4sf3
14444 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14445 || d->icode == CODE_FOR_maskcmpv2df3
14446 || d->icode == CODE_FOR_vmmaskcmpv2df3
14447 || d->icode == CODE_FOR_maskncmpv2df3
14448 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14449 return ix86_expand_sse_compare (d, arglist, target);
14451 return ix86_expand_binop_builtin (d->icode, arglist, target);
14454 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14455 if (d->code == fcode)
14456 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14458 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14459 if (d->code == fcode)
14460 return ix86_expand_sse_comi (d, arglist, target);
14462 /* @@@ Should really do something sensible here. */
14466 /* Store OPERAND to the memory after reload is completed. This means
14467 that we can't easily use assign_stack_local. */
14469 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14472 if (!reload_completed)
14474 if (TARGET_RED_ZONE)
14476 result = gen_rtx_MEM (mode,
14477 gen_rtx_PLUS (Pmode,
14479 GEN_INT (-RED_ZONE_SIZE)));
14480 emit_move_insn (result, operand);
14482 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14488 operand = gen_lowpart (DImode, operand);
14492 gen_rtx_SET (VOIDmode,
14493 gen_rtx_MEM (DImode,
14494 gen_rtx_PRE_DEC (DImode,
14495 stack_pointer_rtx)),
14501 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14510 split_di (&operand, 1, operands, operands + 1);
14512 gen_rtx_SET (VOIDmode,
14513 gen_rtx_MEM (SImode,
14514 gen_rtx_PRE_DEC (Pmode,
14515 stack_pointer_rtx)),
14518 gen_rtx_SET (VOIDmode,
14519 gen_rtx_MEM (SImode,
14520 gen_rtx_PRE_DEC (Pmode,
14521 stack_pointer_rtx)),
14526 /* It is better to store HImodes as SImodes. */
14527 if (!TARGET_PARTIAL_REG_STALL)
14528 operand = gen_lowpart (SImode, operand);
14532 gen_rtx_SET (VOIDmode,
14533 gen_rtx_MEM (GET_MODE (operand),
14534 gen_rtx_PRE_DEC (SImode,
14535 stack_pointer_rtx)),
14541 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14546 /* Free operand from the memory. */
14548 ix86_free_from_memory (enum machine_mode mode)
14550 if (!TARGET_RED_ZONE)
14554 if (mode == DImode || TARGET_64BIT)
14556 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14560 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14561 to pop or add instruction if registers are available. */
14562 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14563 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14568 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14569 QImode must go into class Q_REGS.
14570 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14571 movdf to do mem-to-mem moves through integer regs. */
14573 ix86_preferred_reload_class (rtx x, enum reg_class class)
14575 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14577 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14579 /* SSE can't load any constant directly yet. */
14580 if (SSE_CLASS_P (class))
14582 /* Floats can load 0 and 1. */
14583 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14585 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14586 if (MAYBE_SSE_CLASS_P (class))
14587 return (reg_class_subset_p (class, GENERAL_REGS)
14588 ? GENERAL_REGS : FLOAT_REGS);
14592 /* General regs can load everything. */
14593 if (reg_class_subset_p (class, GENERAL_REGS))
14594 return GENERAL_REGS;
14595 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14596 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14599 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14601 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14606 /* If we are copying between general and FP registers, we need a memory
14607 location. The same is true for SSE and MMX registers.
14609 The macro can't work reliably when one of the CLASSES is class containing
14610 registers from multiple units (SSE, MMX, integer). We avoid this by never
14611 combining those units in single alternative in the machine description.
14612 Ensure that this constraint holds to avoid unexpected surprises.
14614 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14615 enforce these sanity checks. */
14617 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14618 enum machine_mode mode, int strict)
14620 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14621 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14622 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14623 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14624 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14625 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14632 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14633 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14634 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14635 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14636 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14638 /* Return the cost of moving data from a register in class CLASS1 to
14639 one in class CLASS2.
14641 It is not required that the cost always equal 2 when FROM is the same as TO;
14642 on some machines it is expensive to move between registers if they are not
14643 general registers. */
14645 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14646 enum reg_class class2)
14648 /* In case we require secondary memory, compute cost of the store followed
14649 by load. In order to avoid bad register allocation choices, we need
14650 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14652 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14656 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14657 MEMORY_MOVE_COST (mode, class1, 1));
14658 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14659 MEMORY_MOVE_COST (mode, class2, 1));
14661 /* In case of copying from general_purpose_register we may emit multiple
14662 stores followed by single load causing memory size mismatch stall.
14663 Count this as arbitrarily high cost of 20. */
14664 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14667 /* In the case of FP/MMX moves, the registers actually overlap, and we
14668 have to switch modes in order to treat them differently. */
14669 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14670 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14676 /* Moves between SSE/MMX and integer unit are expensive. */
14677 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14678 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14679 return ix86_cost->mmxsse_to_integer;
14680 if (MAYBE_FLOAT_CLASS_P (class1))
14681 return ix86_cost->fp_move;
14682 if (MAYBE_SSE_CLASS_P (class1))
14683 return ix86_cost->sse_move;
14684 if (MAYBE_MMX_CLASS_P (class1))
14685 return ix86_cost->mmx_move;
14689 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14691 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14693 /* Flags and only flags can only hold CCmode values. */
14694 if (CC_REGNO_P (regno))
14695 return GET_MODE_CLASS (mode) == MODE_CC;
14696 if (GET_MODE_CLASS (mode) == MODE_CC
14697 || GET_MODE_CLASS (mode) == MODE_RANDOM
14698 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14700 if (FP_REGNO_P (regno))
14701 return VALID_FP_MODE_P (mode);
14702 if (SSE_REGNO_P (regno))
14703 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14704 if (MMX_REGNO_P (regno))
14706 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14707 /* We handle both integer and floats in the general purpose registers.
14708 In future we should be able to handle vector modes as well. */
14709 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14711 /* Take care for QImode values - they can be in non-QI regs, but then
14712 they do cause partial register stalls. */
14713 if (regno < 4 || mode != QImode || TARGET_64BIT)
14715 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14718 /* Return the cost of moving data of mode M between a
14719 register and memory. A value of 2 is the default; this cost is
14720 relative to those in `REGISTER_MOVE_COST'.
14722 If moving between registers and memory is more expensive than
14723 between two registers, you should define this macro to express the
14726 Model also increased moving costs of QImode registers in non
14730 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14732 if (FLOAT_CLASS_P (class))
14750 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14752 if (SSE_CLASS_P (class))
14755 switch (GET_MODE_SIZE (mode))
14769 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14771 if (MMX_CLASS_P (class))
14774 switch (GET_MODE_SIZE (mode))
14785 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14787 switch (GET_MODE_SIZE (mode))
14791 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14792 : ix86_cost->movzbl_load);
14794 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14795 : ix86_cost->int_store[0] + 4);
14798 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14800 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14801 if (mode == TFmode)
14803 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14804 * (((int) GET_MODE_SIZE (mode)
14805 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14809 /* Compute a (partial) cost for rtx X. Return true if the complete
14810 cost has been computed, and false if subexpressions should be
14811 scanned. In either case, *TOTAL contains the cost result. */
14814 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14816 enum machine_mode mode = GET_MODE (x);
14824 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14826 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14828 else if (flag_pic && SYMBOLIC_CONST (x)
14830 || (!GET_CODE (x) != LABEL_REF
14831 && (GET_CODE (x) != SYMBOL_REF
14832 || !SYMBOL_REF_LOCAL_P (x)))))
14839 if (mode == VOIDmode)
14842 switch (standard_80387_constant_p (x))
14847 default: /* Other constants */
14852 /* Start with (MEM (SYMBOL_REF)), since that's where
14853 it'll probably end up. Add a penalty for size. */
14854 *total = (COSTS_N_INSNS (1)
14855 + (flag_pic != 0 && !TARGET_64BIT)
14856 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14862 /* The zero extensions is often completely free on x86_64, so make
14863 it as cheap as possible. */
14864 if (TARGET_64BIT && mode == DImode
14865 && GET_MODE (XEXP (x, 0)) == SImode)
14867 else if (TARGET_ZERO_EXTEND_WITH_AND)
14868 *total = COSTS_N_INSNS (ix86_cost->add);
14870 *total = COSTS_N_INSNS (ix86_cost->movzx);
14874 *total = COSTS_N_INSNS (ix86_cost->movsx);
14878 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14879 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14881 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14884 *total = COSTS_N_INSNS (ix86_cost->add);
14887 if ((value == 2 || value == 3)
14888 && !TARGET_DECOMPOSE_LEA
14889 && ix86_cost->lea <= ix86_cost->shift_const)
14891 *total = COSTS_N_INSNS (ix86_cost->lea);
14901 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14903 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14905 if (INTVAL (XEXP (x, 1)) > 32)
14906 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14908 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14912 if (GET_CODE (XEXP (x, 1)) == AND)
14913 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14915 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14920 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14921 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14923 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14928 if (FLOAT_MODE_P (mode))
14929 *total = COSTS_N_INSNS (ix86_cost->fmul);
14930 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14932 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14935 for (nbits = 0; value != 0; value >>= 1)
14938 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14939 + nbits * ix86_cost->mult_bit);
14943 /* This is arbitrary */
14944 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14945 + 7 * ix86_cost->mult_bit);
14953 if (FLOAT_MODE_P (mode))
14954 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14956 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14960 if (FLOAT_MODE_P (mode))
14961 *total = COSTS_N_INSNS (ix86_cost->fadd);
14962 else if (!TARGET_DECOMPOSE_LEA
14963 && GET_MODE_CLASS (mode) == MODE_INT
14964 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14966 if (GET_CODE (XEXP (x, 0)) == PLUS
14967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14968 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14969 && CONSTANT_P (XEXP (x, 1)))
14971 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14972 if (val == 2 || val == 4 || val == 8)
14974 *total = COSTS_N_INSNS (ix86_cost->lea);
14975 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14976 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14978 *total += rtx_cost (XEXP (x, 1), outer_code);
14982 else if (GET_CODE (XEXP (x, 0)) == MULT
14983 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14985 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14986 if (val == 2 || val == 4 || val == 8)
14988 *total = COSTS_N_INSNS (ix86_cost->lea);
14989 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14990 *total += rtx_cost (XEXP (x, 1), outer_code);
14994 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14996 *total = COSTS_N_INSNS (ix86_cost->lea);
14997 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14998 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14999 *total += rtx_cost (XEXP (x, 1), outer_code);
15006 if (FLOAT_MODE_P (mode))
15008 *total = COSTS_N_INSNS (ix86_cost->fadd);
15016 if (!TARGET_64BIT && mode == DImode)
15018 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15019 + (rtx_cost (XEXP (x, 0), outer_code)
15020 << (GET_MODE (XEXP (x, 0)) != DImode))
15021 + (rtx_cost (XEXP (x, 1), outer_code)
15022 << (GET_MODE (XEXP (x, 1)) != DImode)));
15028 if (FLOAT_MODE_P (mode))
15030 *total = COSTS_N_INSNS (ix86_cost->fchs);
15036 if (!TARGET_64BIT && mode == DImode)
15037 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15039 *total = COSTS_N_INSNS (ix86_cost->add);
15043 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15048 if (FLOAT_MODE_P (mode))
15049 *total = COSTS_N_INSNS (ix86_cost->fabs);
15053 if (FLOAT_MODE_P (mode))
15054 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15058 if (XINT (x, 1) == UNSPEC_TP)
15067 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15069 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15072 fputs ("\tpushl $", asm_out_file);
15073 assemble_name (asm_out_file, XSTR (symbol, 0));
15074 fputc ('\n', asm_out_file);
15080 static int current_machopic_label_num;
15082 /* Given a symbol name and its associated stub, write out the
15083 definition of the stub. */
15086 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15088 unsigned int length;
15089 char *binder_name, *symbol_name, lazy_ptr_name[32];
15090 int label = ++current_machopic_label_num;
15092 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15093 symb = (*targetm.strip_name_encoding) (symb);
15095 length = strlen (stub);
15096 binder_name = alloca (length + 32);
15097 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15099 length = strlen (symb);
15100 symbol_name = alloca (length + 32);
15101 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15103 sprintf (lazy_ptr_name, "L%d$lz", label);
15106 machopic_picsymbol_stub_section ();
15108 machopic_symbol_stub_section ();
15110 fprintf (file, "%s:\n", stub);
15111 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15115 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15116 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15117 fprintf (file, "\tjmp %%edx\n");
15120 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15122 fprintf (file, "%s:\n", binder_name);
15126 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15127 fprintf (file, "\tpushl %%eax\n");
15130 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15132 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15134 machopic_lazy_symbol_ptr_section ();
15135 fprintf (file, "%s:\n", lazy_ptr_name);
15136 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15137 fprintf (file, "\t.long %s\n", binder_name);
15139 #endif /* TARGET_MACHO */
15141 /* Order the registers for register allocator. */
15144 x86_order_regs_for_local_alloc (void)
15149 /* First allocate the local general purpose registers. */
15150 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15151 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15152 reg_alloc_order [pos++] = i;
15154 /* Global general purpose registers. */
15155 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15156 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15157 reg_alloc_order [pos++] = i;
15159 /* x87 registers come first in case we are doing FP math
15161 if (!TARGET_SSE_MATH)
15162 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15163 reg_alloc_order [pos++] = i;
15165 /* SSE registers. */
15166 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15167 reg_alloc_order [pos++] = i;
15168 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15169 reg_alloc_order [pos++] = i;
15171 /* x87 registers. */
15172 if (TARGET_SSE_MATH)
15173 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15174 reg_alloc_order [pos++] = i;
15176 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15177 reg_alloc_order [pos++] = i;
15179 /* Initialize the rest of array as we do not allocate some registers
15181 while (pos < FIRST_PSEUDO_REGISTER)
15182 reg_alloc_order [pos++] = 0;
15185 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15186 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15189 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15190 struct attribute_spec.handler. */
15192 ix86_handle_struct_attribute (tree *node, tree name,
15193 tree args ATTRIBUTE_UNUSED,
15194 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15197 if (DECL_P (*node))
15199 if (TREE_CODE (*node) == TYPE_DECL)
15200 type = &TREE_TYPE (*node);
15205 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15206 || TREE_CODE (*type) == UNION_TYPE)))
15208 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15209 *no_add_attrs = true;
15212 else if ((is_attribute_p ("ms_struct", name)
15213 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15214 || ((is_attribute_p ("gcc_struct", name)
15215 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15217 warning ("`%s' incompatible attribute ignored",
15218 IDENTIFIER_POINTER (name));
15219 *no_add_attrs = true;
15226 ix86_ms_bitfield_layout_p (tree record_type)
15228 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15229 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15230 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15233 /* Returns an expression indicating where the this parameter is
15234 located on entry to the FUNCTION. */
15237 x86_this_parameter (tree function)
15239 tree type = TREE_TYPE (function);
15243 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15244 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15247 if (ix86_function_regparm (type, function) > 0)
15251 parm = TYPE_ARG_TYPES (type);
15252 /* Figure out whether or not the function has a variable number of
15254 for (; parm; parm = TREE_CHAIN (parm))
15255 if (TREE_VALUE (parm) == void_type_node)
15257 /* If not, the this parameter is in the first argument. */
15261 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15263 return gen_rtx_REG (SImode, regno);
15267 if (aggregate_value_p (TREE_TYPE (type), type))
15268 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15270 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15273 /* Determine whether x86_output_mi_thunk can succeed. */
15276 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15277 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15278 HOST_WIDE_INT vcall_offset, tree function)
15280 /* 64-bit can handle anything. */
15284 /* For 32-bit, everything's fine if we have one free register. */
15285 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15288 /* Need a free register for vcall_offset. */
15292 /* Need a free register for GOT references. */
15293 if (flag_pic && !(*targetm.binds_local_p) (function))
15296 /* Otherwise ok. */
15300 /* Output the assembler code for a thunk function. THUNK_DECL is the
15301 declaration for the thunk function itself, FUNCTION is the decl for
15302 the target function. DELTA is an immediate constant offset to be
15303 added to THIS. If VCALL_OFFSET is nonzero, the word at
15304 *(*this + vcall_offset) should be added to THIS. */
15307 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15308 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15309 HOST_WIDE_INT vcall_offset, tree function)
15312 rtx this = x86_this_parameter (function);
15315 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15316 pull it in now and let DELTA benefit. */
15319 else if (vcall_offset)
15321 /* Put the this parameter into %eax. */
15323 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15324 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15327 this_reg = NULL_RTX;
15329 /* Adjust the this parameter by a fixed constant. */
15332 xops[0] = GEN_INT (delta);
15333 xops[1] = this_reg ? this_reg : this;
15336 if (!x86_64_general_operand (xops[0], DImode))
15338 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15340 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15344 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15347 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15350 /* Adjust the this parameter by a value stored in the vtable. */
15354 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15357 int tmp_regno = 2 /* ECX */;
15358 if (lookup_attribute ("fastcall",
15359 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15360 tmp_regno = 0 /* EAX */;
15361 tmp = gen_rtx_REG (SImode, tmp_regno);
15364 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15367 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15369 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15371 /* Adjust the this parameter. */
15372 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15373 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15375 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15376 xops[0] = GEN_INT (vcall_offset);
15378 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15379 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15381 xops[1] = this_reg;
15383 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15385 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15388 /* If necessary, drop THIS back to its stack slot. */
15389 if (this_reg && this_reg != this)
15391 xops[0] = this_reg;
15393 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15396 xops[0] = XEXP (DECL_RTL (function), 0);
15399 if (!flag_pic || (*targetm.binds_local_p) (function))
15400 output_asm_insn ("jmp\t%P0", xops);
15403 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15404 tmp = gen_rtx_CONST (Pmode, tmp);
15405 tmp = gen_rtx_MEM (QImode, tmp);
15407 output_asm_insn ("jmp\t%A0", xops);
15412 if (!flag_pic || (*targetm.binds_local_p) (function))
15413 output_asm_insn ("jmp\t%P0", xops);
15418 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15419 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15420 tmp = gen_rtx_MEM (QImode, tmp);
15422 output_asm_insn ("jmp\t%0", xops);
15425 #endif /* TARGET_MACHO */
15427 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15428 output_set_got (tmp);
15431 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15432 output_asm_insn ("jmp\t{*}%1", xops);
15438 x86_file_start (void)
15440 default_file_start ();
15441 if (X86_FILE_START_VERSION_DIRECTIVE)
15442 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15443 if (X86_FILE_START_FLTUSED)
15444 fputs ("\t.global\t__fltused\n", asm_out_file);
15445 if (ix86_asm_dialect == ASM_INTEL)
15446 fputs ("\t.intel_syntax\n", asm_out_file);
15450 x86_field_alignment (tree field, int computed)
15452 enum machine_mode mode;
15453 tree type = TREE_TYPE (field);
15455 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15457 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15458 ? get_inner_array_type (type) : type);
15459 if (mode == DFmode || mode == DCmode
15460 || GET_MODE_CLASS (mode) == MODE_INT
15461 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15462 return MIN (32, computed);
15466 /* Output assembler code to FILE to increment profiler label # LABELNO
15467 for profiling a function entry. */
15469 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15474 #ifndef NO_PROFILE_COUNTERS
15475 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15477 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15481 #ifndef NO_PROFILE_COUNTERS
15482 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15484 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15488 #ifndef NO_PROFILE_COUNTERS
15489 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15490 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15492 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15496 #ifndef NO_PROFILE_COUNTERS
15497 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15498 PROFILE_COUNT_REGISTER);
15500 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15504 /* We don't have exact information about the insn sizes, but we may assume
15505 quite safely that we are informed about all 1 byte insns and memory
15506 address sizes. This is enough to eliminate unnecessary padding in
15510 min_insn_size (rtx insn)
15514 if (!INSN_P (insn) || !active_insn_p (insn))
15517 /* Discard alignments we've emit and jump instructions. */
15518 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15519 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15521 if (GET_CODE (insn) == JUMP_INSN
15522 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15523 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15526 /* Important case - calls are always 5 bytes.
15527 It is common to have many calls in the row. */
15528 if (GET_CODE (insn) == CALL_INSN
15529 && symbolic_reference_mentioned_p (PATTERN (insn))
15530 && !SIBLING_CALL_P (insn))
15532 if (get_attr_length (insn) <= 1)
15535 /* For normal instructions we may rely on the sizes of addresses
15536 and the presence of symbol to require 4 bytes of encoding.
15537 This is not the case for jumps where references are PC relative. */
15538 if (GET_CODE (insn) != JUMP_INSN)
15540 l = get_attr_length_address (insn);
15541 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15550 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15554 k8_avoid_jump_misspredicts (void)
15556 rtx insn, start = get_insns ();
15557 int nbytes = 0, njumps = 0;
15560 /* Look for all minimal intervals of instructions containing 4 jumps.
15561 The intervals are bounded by START and INSN. NBYTES is the total
15562 size of instructions in the interval including INSN and not including
15563 START. When the NBYTES is smaller than 16 bytes, it is possible
15564 that the end of START and INSN ends up in the same 16byte page.
15566 The smallest offset in the page INSN can start is the case where START
15567 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15568 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15570 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15573 nbytes += min_insn_size (insn);
15575 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15576 INSN_UID (insn), min_insn_size (insn));
15577 if ((GET_CODE (insn) == JUMP_INSN
15578 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15579 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15580 || GET_CODE (insn) == CALL_INSN)
15587 start = NEXT_INSN (start);
15588 if ((GET_CODE (start) == JUMP_INSN
15589 && GET_CODE (PATTERN (start)) != ADDR_VEC
15590 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15591 || GET_CODE (start) == CALL_INSN)
15592 njumps--, isjump = 1;
15595 nbytes -= min_insn_size (start);
15600 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15601 INSN_UID (start), INSN_UID (insn), nbytes);
15603 if (njumps == 3 && isjump && nbytes < 16)
15605 int padsize = 15 - nbytes + min_insn_size (insn);
15608 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15609 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15614 /* Implement machine specific optimizations.
15615 At the moment we implement single transformation: AMD Athlon works faster
15616 when RET is not destination of conditional jump or directly preceded
15617 by other jump instruction. We avoid the penalty by inserting NOP just
15618 before the RET instructions in such cases. */
15624 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15626 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15628 basic_block bb = e->src;
15631 bool replace = false;
15633 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15634 || !maybe_hot_bb_p (bb))
15636 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15637 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15639 if (prev && GET_CODE (prev) == CODE_LABEL)
15642 for (e = bb->pred; e; e = e->pred_next)
15643 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15644 && !(e->flags & EDGE_FALLTHRU))
15649 prev = prev_active_insn (ret);
15651 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15652 || GET_CODE (prev) == CALL_INSN))
15654 /* Empty functions get branch mispredict even when the jump destination
15655 is not visible to us. */
15656 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15661 emit_insn_before (gen_return_internal_long (), ret);
15665 k8_avoid_jump_misspredicts ();
15668 /* Return nonzero when QImode register that must be represented via REX prefix
15671 x86_extended_QIreg_mentioned_p (rtx insn)
15674 extract_insn_cached (insn);
15675 for (i = 0; i < recog_data.n_operands; i++)
15676 if (REG_P (recog_data.operand[i])
15677 && REGNO (recog_data.operand[i]) >= 4)
15682 /* Return nonzero when P points to register encoded via REX prefix.
15683 Called via for_each_rtx. */
15685 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15687 unsigned int regno;
15690 regno = REGNO (*p);
15691 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15694 /* Return true when INSN mentions register that must be encoded using REX
15697 x86_extended_reg_mentioned_p (rtx insn)
15699 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15702 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15703 optabs would emit if we didn't have TFmode patterns. */
15706 x86_emit_floatuns (rtx operands[2])
15708 rtx neglab, donelab, i0, i1, f0, in, out;
15709 enum machine_mode mode, inmode;
15711 inmode = GET_MODE (operands[1]);
15712 if (inmode != SImode
15713 && inmode != DImode)
15717 in = force_reg (inmode, operands[1]);
15718 mode = GET_MODE (out);
15719 neglab = gen_label_rtx ();
15720 donelab = gen_label_rtx ();
15721 i1 = gen_reg_rtx (Pmode);
15722 f0 = gen_reg_rtx (mode);
15724 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15726 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15727 emit_jump_insn (gen_jump (donelab));
15730 emit_label (neglab);
15732 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15733 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15734 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15735 expand_float (f0, i0, 0);
15736 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15738 emit_label (donelab);
15741 /* Return if we do not know how to pass TYPE solely in registers. */
15743 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15745 if (default_must_pass_in_stack (mode, type))
15747 return (!TARGET_64BIT && type && mode == TImode);
15750 #include "gt-i386.h"