1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
834 static tree ix86_build_builtin_va_list (void);
838 rtx base, index, disp;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address (rtx, struct ix86_address *);
844 static int ix86_address_cost (rtx);
845 static bool ix86_cannot_force_const_mem (rtx);
846 static rtx ix86_delegitimize_address (rtx);
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi (const struct builtin_description *,
851 static rtx ix86_expand_sse_compare (const struct builtin_description *,
853 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
854 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
855 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
856 static rtx ix86_expand_store_builtin (enum insn_code, tree);
857 static rtx safe_vector_operand (rtx, enum machine_mode);
858 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
859 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
860 enum rtx_code *, enum rtx_code *);
861 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
862 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
863 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
864 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
865 static int ix86_fp_comparison_cost (enum rtx_code code);
866 static unsigned int ix86_select_alt_pic_regnum (void);
867 static int ix86_save_reg (unsigned int, int);
868 static void ix86_compute_frame_layout (struct ix86_frame *);
869 static int ix86_comp_type_attributes (tree, tree);
870 static int ix86_function_regparm (tree, tree);
871 const struct attribute_spec ix86_attribute_table[];
872 static bool ix86_function_ok_for_sibcall (tree, tree);
873 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
874 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
875 static int ix86_value_regno (enum machine_mode);
876 static bool contains_128bit_aligned_vector_p (tree);
877 static bool ix86_ms_bitfield_layout_p (tree);
878 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
879 static int extended_reg_mentioned_1 (rtx *, void *);
880 static bool ix86_rtx_costs (rtx, int, int, int *);
881 static int min_insn_size (rtx);
882 static void k8_avoid_jump_misspredicts (void);
884 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
885 static void ix86_svr3_asm_out_constructor (rtx, int);
888 /* Register class used for passing given 64bit part of the argument.
889 These represent classes as documented by the PS ABI, with the exception
890 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
891 use SF or DFmode move instead of DImode to avoid reformatting penalties.
893 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
894 whenever possible (upper half does contain padding).
896 enum x86_64_reg_class
899 X86_64_INTEGER_CLASS,
900 X86_64_INTEGERSI_CLASS,
909 static const char * const x86_64_reg_class_name[] =
910 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
912 #define MAX_CLASSES 4
913 static int classify_argument (enum machine_mode, tree,
914 enum x86_64_reg_class [MAX_CLASSES], int);
915 static int examine_argument (enum machine_mode, tree, int, int *, int *);
916 static rtx construct_container (enum machine_mode, tree, int, int, int,
918 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
919 enum x86_64_reg_class);
921 /* Table of constants used by fldpi, fldln2, etc.... */
922 static REAL_VALUE_TYPE ext_80387_constants_table [5];
923 static bool ext_80387_constants_init = 0;
924 static void init_ext_80387_constants (void);
926 /* Initialize the GCC target structure. */
927 #undef TARGET_ATTRIBUTE_TABLE
928 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
929 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
930 # undef TARGET_MERGE_DECL_ATTRIBUTES
931 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
934 #undef TARGET_COMP_TYPE_ATTRIBUTES
935 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
937 #undef TARGET_INIT_BUILTINS
938 #define TARGET_INIT_BUILTINS ix86_init_builtins
940 #undef TARGET_EXPAND_BUILTIN
941 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
943 #undef TARGET_ASM_FUNCTION_EPILOGUE
944 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
946 #undef TARGET_ASM_OPEN_PAREN
947 #define TARGET_ASM_OPEN_PAREN ""
948 #undef TARGET_ASM_CLOSE_PAREN
949 #define TARGET_ASM_CLOSE_PAREN ""
951 #undef TARGET_ASM_ALIGNED_HI_OP
952 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
953 #undef TARGET_ASM_ALIGNED_SI_OP
954 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
956 #undef TARGET_ASM_ALIGNED_DI_OP
957 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
960 #undef TARGET_ASM_UNALIGNED_HI_OP
961 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
962 #undef TARGET_ASM_UNALIGNED_SI_OP
963 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
964 #undef TARGET_ASM_UNALIGNED_DI_OP
965 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
967 #undef TARGET_SCHED_ADJUST_COST
968 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
969 #undef TARGET_SCHED_ISSUE_RATE
970 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
971 #undef TARGET_SCHED_VARIABLE_ISSUE
972 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
973 #undef TARGET_SCHED_INIT
974 #define TARGET_SCHED_INIT ix86_sched_init
975 #undef TARGET_SCHED_REORDER
976 #define TARGET_SCHED_REORDER ix86_sched_reorder
977 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
978 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
979 ia32_use_dfa_pipeline_interface
980 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
981 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
982 ia32_multipass_dfa_lookahead
984 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
985 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
988 #undef TARGET_HAVE_TLS
989 #define TARGET_HAVE_TLS true
991 #undef TARGET_CANNOT_FORCE_CONST_MEM
992 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
994 #undef TARGET_DELEGITIMIZE_ADDRESS
995 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
997 #undef TARGET_MS_BITFIELD_LAYOUT_P
998 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1000 #undef TARGET_ASM_OUTPUT_MI_THUNK
1001 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1002 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1003 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1005 #undef TARGET_ASM_FILE_START
1006 #define TARGET_ASM_FILE_START x86_file_start
1008 #undef TARGET_RTX_COSTS
1009 #define TARGET_RTX_COSTS ix86_rtx_costs
1010 #undef TARGET_ADDRESS_COST
1011 #define TARGET_ADDRESS_COST ix86_address_cost
1013 #undef TARGET_MACHINE_DEPENDENT_REORG
1014 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1016 #undef TARGET_BUILD_BUILTIN_VA_LIST
1017 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1019 struct gcc_target targetm = TARGET_INITIALIZER;
1021 /* The svr4 ABI for the i386 says that records and unions are returned
1023 #ifndef DEFAULT_PCC_STRUCT_RETURN
1024 #define DEFAULT_PCC_STRUCT_RETURN 1
1027 /* Sometimes certain combinations of command options do not make
1028 sense on a particular target machine. You can define a macro
1029 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1030 defined, is executed once just after all the command options have
1033 Don't use this macro to turn on various extra optimizations for
1034 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1037 override_options (void)
1040 /* Comes from final.c -- no real reason to change it. */
1041 #define MAX_CODE_ALIGN 16
1045 const struct processor_costs *cost; /* Processor costs */
1046 const int target_enable; /* Target flags to enable. */
1047 const int target_disable; /* Target flags to disable. */
1048 const int align_loop; /* Default alignments. */
1049 const int align_loop_max_skip;
1050 const int align_jump;
1051 const int align_jump_max_skip;
1052 const int align_func;
1054 const processor_target_table[PROCESSOR_max] =
1056 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1057 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1058 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1059 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1060 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1061 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1062 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1063 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1066 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1069 const char *const name; /* processor name or nickname. */
1070 const enum processor_type processor;
1071 const enum pta_flags
1076 PTA_PREFETCH_SSE = 8,
1082 const processor_alias_table[] =
1084 {"i386", PROCESSOR_I386, 0},
1085 {"i486", PROCESSOR_I486, 0},
1086 {"i586", PROCESSOR_PENTIUM, 0},
1087 {"pentium", PROCESSOR_PENTIUM, 0},
1088 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1089 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1090 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1091 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1092 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1093 {"i686", PROCESSOR_PENTIUMPRO, 0},
1094 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1095 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1096 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1097 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1098 PTA_MMX | PTA_PREFETCH_SSE},
1099 {"k6", PROCESSOR_K6, PTA_MMX},
1100 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1101 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1102 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1104 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1105 | PTA_3DNOW | PTA_3DNOW_A},
1106 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1113 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1114 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1115 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1116 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1117 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1118 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1119 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1122 int const pta_size = ARRAY_SIZE (processor_alias_table);
1124 /* Set the default values for switches whose default depends on TARGET_64BIT
1125 in case they weren't overwritten by command line options. */
1128 if (flag_omit_frame_pointer == 2)
1129 flag_omit_frame_pointer = 1;
1130 if (flag_asynchronous_unwind_tables == 2)
1131 flag_asynchronous_unwind_tables = 1;
1132 if (flag_pcc_struct_return == 2)
1133 flag_pcc_struct_return = 0;
1137 if (flag_omit_frame_pointer == 2)
1138 flag_omit_frame_pointer = 0;
1139 if (flag_asynchronous_unwind_tables == 2)
1140 flag_asynchronous_unwind_tables = 0;
1141 if (flag_pcc_struct_return == 2)
1142 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1145 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1146 SUBTARGET_OVERRIDE_OPTIONS;
1149 if (!ix86_tune_string && ix86_arch_string)
1150 ix86_tune_string = ix86_arch_string;
1151 if (!ix86_tune_string)
1152 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1153 if (!ix86_arch_string)
1154 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1156 if (ix86_cmodel_string != 0)
1158 if (!strcmp (ix86_cmodel_string, "small"))
1159 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1161 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1162 else if (!strcmp (ix86_cmodel_string, "32"))
1163 ix86_cmodel = CM_32;
1164 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1165 ix86_cmodel = CM_KERNEL;
1166 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1167 ix86_cmodel = CM_MEDIUM;
1168 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1169 ix86_cmodel = CM_LARGE;
1171 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1175 ix86_cmodel = CM_32;
1177 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1179 if (ix86_asm_string != 0)
1181 if (!strcmp (ix86_asm_string, "intel"))
1182 ix86_asm_dialect = ASM_INTEL;
1183 else if (!strcmp (ix86_asm_string, "att"))
1184 ix86_asm_dialect = ASM_ATT;
1186 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1188 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1189 error ("code model `%s' not supported in the %s bit mode",
1190 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1191 if (ix86_cmodel == CM_LARGE)
1192 sorry ("code model `large' not supported yet");
1193 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1194 sorry ("%i-bit mode not compiled in",
1195 (target_flags & MASK_64BIT) ? 64 : 32);
1197 for (i = 0; i < pta_size; i++)
1198 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1200 ix86_arch = processor_alias_table[i].processor;
1201 /* Default cpu tuning to the architecture. */
1202 ix86_tune = ix86_arch;
1203 if (processor_alias_table[i].flags & PTA_MMX
1204 && !(target_flags_explicit & MASK_MMX))
1205 target_flags |= MASK_MMX;
1206 if (processor_alias_table[i].flags & PTA_3DNOW
1207 && !(target_flags_explicit & MASK_3DNOW))
1208 target_flags |= MASK_3DNOW;
1209 if (processor_alias_table[i].flags & PTA_3DNOW_A
1210 && !(target_flags_explicit & MASK_3DNOW_A))
1211 target_flags |= MASK_3DNOW_A;
1212 if (processor_alias_table[i].flags & PTA_SSE
1213 && !(target_flags_explicit & MASK_SSE))
1214 target_flags |= MASK_SSE;
1215 if (processor_alias_table[i].flags & PTA_SSE2
1216 && !(target_flags_explicit & MASK_SSE2))
1217 target_flags |= MASK_SSE2;
1218 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1219 x86_prefetch_sse = true;
1220 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1221 error ("CPU you selected does not support x86-64 instruction set");
1226 error ("bad value (%s) for -march= switch", ix86_arch_string);
1228 for (i = 0; i < pta_size; i++)
1229 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1231 ix86_tune = processor_alias_table[i].processor;
1232 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1233 error ("CPU you selected does not support x86-64 instruction set");
1236 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1237 x86_prefetch_sse = true;
1239 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1242 ix86_cost = &size_cost;
1244 ix86_cost = processor_target_table[ix86_tune].cost;
1245 target_flags |= processor_target_table[ix86_tune].target_enable;
1246 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1248 /* Arrange to set up i386_stack_locals for all functions. */
1249 init_machine_status = ix86_init_machine_status;
1251 /* Validate -mregparm= value. */
1252 if (ix86_regparm_string)
1254 i = atoi (ix86_regparm_string);
1255 if (i < 0 || i > REGPARM_MAX)
1256 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1262 ix86_regparm = REGPARM_MAX;
1264 /* If the user has provided any of the -malign-* options,
1265 warn and use that value only if -falign-* is not set.
1266 Remove this code in GCC 3.2 or later. */
1267 if (ix86_align_loops_string)
1269 warning ("-malign-loops is obsolete, use -falign-loops");
1270 if (align_loops == 0)
1272 i = atoi (ix86_align_loops_string);
1273 if (i < 0 || i > MAX_CODE_ALIGN)
1274 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1276 align_loops = 1 << i;
1280 if (ix86_align_jumps_string)
1282 warning ("-malign-jumps is obsolete, use -falign-jumps");
1283 if (align_jumps == 0)
1285 i = atoi (ix86_align_jumps_string);
1286 if (i < 0 || i > MAX_CODE_ALIGN)
1287 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1289 align_jumps = 1 << i;
1293 if (ix86_align_funcs_string)
1295 warning ("-malign-functions is obsolete, use -falign-functions");
1296 if (align_functions == 0)
1298 i = atoi (ix86_align_funcs_string);
1299 if (i < 0 || i > MAX_CODE_ALIGN)
1300 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1302 align_functions = 1 << i;
1306 /* Default align_* from the processor table. */
1307 if (align_loops == 0)
1309 align_loops = processor_target_table[ix86_tune].align_loop;
1310 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1312 if (align_jumps == 0)
1314 align_jumps = processor_target_table[ix86_tune].align_jump;
1315 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1317 if (align_functions == 0)
1319 align_functions = processor_target_table[ix86_tune].align_func;
1322 /* Validate -mpreferred-stack-boundary= value, or provide default.
1323 The default of 128 bits is for Pentium III's SSE __m128, but we
1324 don't want additional code to keep the stack aligned when
1325 optimizing for code size. */
1326 ix86_preferred_stack_boundary = (optimize_size
1327 ? TARGET_64BIT ? 128 : 32
1329 if (ix86_preferred_stack_boundary_string)
1331 i = atoi (ix86_preferred_stack_boundary_string);
1332 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1333 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1334 TARGET_64BIT ? 4 : 2);
1336 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1339 /* Validate -mbranch-cost= value, or provide default. */
1340 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1341 if (ix86_branch_cost_string)
1343 i = atoi (ix86_branch_cost_string);
1345 error ("-mbranch-cost=%d is not between 0 and 5", i);
1347 ix86_branch_cost = i;
1350 if (ix86_tls_dialect_string)
1352 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1353 ix86_tls_dialect = TLS_DIALECT_GNU;
1354 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1355 ix86_tls_dialect = TLS_DIALECT_SUN;
1357 error ("bad value (%s) for -mtls-dialect= switch",
1358 ix86_tls_dialect_string);
1361 /* Keep nonleaf frame pointers. */
1362 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1363 flag_omit_frame_pointer = 1;
1365 /* If we're doing fast math, we don't care about comparison order
1366 wrt NaNs. This lets us use a shorter comparison sequence. */
1367 if (flag_unsafe_math_optimizations)
1368 target_flags &= ~MASK_IEEE_FP;
1370 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1371 since the insns won't need emulation. */
1372 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1373 target_flags &= ~MASK_NO_FANCY_MATH_387;
1375 /* Turn on SSE2 builtins for -mpni. */
1377 target_flags |= MASK_SSE2;
1379 /* Turn on SSE builtins for -msse2. */
1381 target_flags |= MASK_SSE;
1385 if (TARGET_ALIGN_DOUBLE)
1386 error ("-malign-double makes no sense in the 64bit mode");
1388 error ("-mrtd calling convention not supported in the 64bit mode");
1389 /* Enable by default the SSE and MMX builtins. */
1390 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1391 ix86_fpmath = FPMATH_SSE;
1395 ix86_fpmath = FPMATH_387;
1396 /* i386 ABI does not specify red zone. It still makes sense to use it
1397 when programmer takes care to stack from being destroyed. */
1398 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1399 target_flags |= MASK_NO_RED_ZONE;
1402 if (ix86_fpmath_string != 0)
1404 if (! strcmp (ix86_fpmath_string, "387"))
1405 ix86_fpmath = FPMATH_387;
1406 else if (! strcmp (ix86_fpmath_string, "sse"))
1410 warning ("SSE instruction set disabled, using 387 arithmetics");
1411 ix86_fpmath = FPMATH_387;
1414 ix86_fpmath = FPMATH_SSE;
1416 else if (! strcmp (ix86_fpmath_string, "387,sse")
1417 || ! strcmp (ix86_fpmath_string, "sse,387"))
1421 warning ("SSE instruction set disabled, using 387 arithmetics");
1422 ix86_fpmath = FPMATH_387;
1424 else if (!TARGET_80387)
1426 warning ("387 instruction set disabled, using SSE arithmetics");
1427 ix86_fpmath = FPMATH_SSE;
1430 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1433 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1436 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1440 target_flags |= MASK_MMX;
1441 x86_prefetch_sse = true;
1444 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1447 target_flags |= MASK_MMX;
1448 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1449 extensions it adds. */
1450 if (x86_3dnow_a & (1 << ix86_arch))
1451 target_flags |= MASK_3DNOW_A;
1453 if ((x86_accumulate_outgoing_args & TUNEMASK)
1454 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1456 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1458 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1461 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1462 p = strchr (internal_label_prefix, 'X');
1463 internal_label_prefix_len = p - internal_label_prefix;
1469 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1471 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1472 make the problem with not enough registers even worse. */
1473 #ifdef INSN_SCHEDULING
1475 flag_schedule_insns = 0;
1478 /* The default values of these switches depend on the TARGET_64BIT
1479 that is not known at this moment. Mark these values with 2 and
1480 let user the to override these. In case there is no command line option
1481 specifying them, we will set the defaults in override_options. */
1483 flag_omit_frame_pointer = 2;
1484 flag_pcc_struct_return = 2;
1485 flag_asynchronous_unwind_tables = 2;
1488 /* Table of valid machine attributes. */
1489 const struct attribute_spec ix86_attribute_table[] =
1491 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1492 /* Stdcall attribute says callee is responsible for popping arguments
1493 if they are not variable. */
1494 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Fastcall attribute says callee is responsible for popping arguments
1496 if they are not variable. */
1497 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1498 /* Cdecl attribute says the callee is a normal C declaration */
1499 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1500 /* Regparm attribute specifies how many integer arguments are to be
1501 passed in registers. */
1502 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1503 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1504 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1505 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1506 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1508 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1509 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1510 { NULL, 0, 0, false, false, false, NULL }
1513 /* Decide whether we can make a sibling call to a function. DECL is the
1514 declaration of the function being targeted by the call and EXP is the
1515 CALL_EXPR representing the call. */
1518 ix86_function_ok_for_sibcall (tree decl, tree exp)
1520 /* If we are generating position-independent code, we cannot sibcall
1521 optimize any indirect call, or a direct call to a global function,
1522 as the PLT requires %ebx be live. */
1523 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1526 /* If we are returning floats on the 80387 register stack, we cannot
1527 make a sibcall from a function that doesn't return a float to a
1528 function that does or, conversely, from a function that does return
1529 a float to a function that doesn't; the necessary stack adjustment
1530 would not be executed. */
1531 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1532 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1535 /* If this call is indirect, we'll need to be able to use a call-clobbered
1536 register for the address of the target function. Make sure that all
1537 such registers are not used for passing parameters. */
1538 if (!decl && !TARGET_64BIT)
1542 /* We're looking at the CALL_EXPR, we need the type of the function. */
1543 type = TREE_OPERAND (exp, 0); /* pointer expression */
1544 type = TREE_TYPE (type); /* pointer type */
1545 type = TREE_TYPE (type); /* function type */
1547 if (ix86_function_regparm (type, NULL) >= 3)
1549 /* ??? Need to count the actual number of registers to be used,
1550 not the possible number of registers. Fix later. */
1555 /* Otherwise okay. That also includes certain types of indirect calls. */
1559 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1560 arguments as in struct attribute_spec.handler. */
1562 ix86_handle_cdecl_attribute (tree *node, tree name,
1563 tree args ATTRIBUTE_UNUSED,
1564 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1566 if (TREE_CODE (*node) != FUNCTION_TYPE
1567 && TREE_CODE (*node) != METHOD_TYPE
1568 && TREE_CODE (*node) != FIELD_DECL
1569 && TREE_CODE (*node) != TYPE_DECL)
1571 warning ("`%s' attribute only applies to functions",
1572 IDENTIFIER_POINTER (name));
1573 *no_add_attrs = true;
1577 if (is_attribute_p ("fastcall", name))
1579 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1581 error ("fastcall and stdcall attributes are not compatible");
1583 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1585 error ("fastcall and regparm attributes are not compatible");
1588 else if (is_attribute_p ("stdcall", name))
1590 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1592 error ("fastcall and stdcall attributes are not compatible");
1599 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1600 *no_add_attrs = true;
1606 /* Handle a "regparm" attribute;
1607 arguments as in struct attribute_spec.handler. */
1609 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1610 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1612 if (TREE_CODE (*node) != FUNCTION_TYPE
1613 && TREE_CODE (*node) != METHOD_TYPE
1614 && TREE_CODE (*node) != FIELD_DECL
1615 && TREE_CODE (*node) != TYPE_DECL)
1617 warning ("`%s' attribute only applies to functions",
1618 IDENTIFIER_POINTER (name));
1619 *no_add_attrs = true;
1625 cst = TREE_VALUE (args);
1626 if (TREE_CODE (cst) != INTEGER_CST)
1628 warning ("`%s' attribute requires an integer constant argument",
1629 IDENTIFIER_POINTER (name));
1630 *no_add_attrs = true;
1632 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1634 warning ("argument to `%s' attribute larger than %d",
1635 IDENTIFIER_POINTER (name), REGPARM_MAX);
1636 *no_add_attrs = true;
1639 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1641 error ("fastcall and regparm attributes are not compatible");
1648 /* Return 0 if the attributes for two types are incompatible, 1 if they
1649 are compatible, and 2 if they are nearly compatible (which causes a
1650 warning to be generated). */
1653 ix86_comp_type_attributes (tree type1, tree type2)
1655 /* Check for mismatch of non-default calling convention. */
1656 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1658 if (TREE_CODE (type1) != FUNCTION_TYPE)
1661 /* Check for mismatched fastcall types */
1662 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1666 /* Check for mismatched return types (cdecl vs stdcall). */
1667 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1668 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1673 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1674 DECL may be NULL when calling function indirectly
1675 or considering a libcall. */
1678 ix86_function_regparm (tree type, tree decl)
1681 int regparm = ix86_regparm;
1682 bool user_convention = false;
1686 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1689 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1690 user_convention = true;
1693 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1696 user_convention = true;
1699 /* Use register calling convention for local functions when possible. */
1700 if (!TARGET_64BIT && !user_convention && decl
1701 && flag_unit_at_a_time && !profile_flag)
1703 struct cgraph_local_info *i = cgraph_local_info (decl);
1706 /* We can't use regparm(3) for nested functions as these use
1707 static chain pointer in third argument. */
1708 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1718 /* Return true if EAX is live at the start of the function. Used by
1719 ix86_expand_prologue to determine if we need special help before
1720 calling allocate_stack_worker. */
1723 ix86_eax_live_at_start_p (void)
1725 /* Cheat. Don't bother working forward from ix86_function_regparm
1726 to the function type to whether an actual argument is located in
1727 eax. Instead just look at cfg info, which is still close enough
1728 to correct at this point. This gives false positives for broken
1729 functions that might use uninitialized data that happens to be
1730 allocated in eax, but who cares? */
1731 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1734 /* Value is the number of bytes of arguments automatically
1735 popped when returning from a subroutine call.
1736 FUNDECL is the declaration node of the function (as a tree),
1737 FUNTYPE is the data type of the function (as a tree),
1738 or for a library call it is an identifier node for the subroutine name.
1739 SIZE is the number of bytes of arguments passed on the stack.
1741 On the 80386, the RTD insn may be used to pop them if the number
1742 of args is fixed, but if the number is variable then the caller
1743 must pop them all. RTD can't be used for library calls now
1744 because the library is compiled with the Unix compiler.
1745 Use of RTD is a selectable option, since it is incompatible with
1746 standard Unix calling sequences. If the option is not selected,
1747 the caller must always pop the args.
1749 The attribute stdcall is equivalent to RTD on a per module basis. */
1752 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1754 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1756 /* Cdecl functions override -mrtd, and never pop the stack. */
1757 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1759 /* Stdcall and fastcall functions will pop the stack if not
1761 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1762 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1766 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1767 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1768 == void_type_node)))
1772 /* Lose any fake structure return argument if it is passed on the stack. */
1773 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1776 int nregs = ix86_function_regparm (funtype, fundecl);
1779 return GET_MODE_SIZE (Pmode);
1785 /* Argument support functions. */
1787 /* Return true when register may be used to pass function parameters. */
1789 ix86_function_arg_regno_p (int regno)
1793 return (regno < REGPARM_MAX
1794 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1795 if (SSE_REGNO_P (regno) && TARGET_SSE)
1797 /* RAX is used as hidden argument to va_arg functions. */
1800 for (i = 0; i < REGPARM_MAX; i++)
1801 if (regno == x86_64_int_parameter_registers[i])
1806 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1807 for a call to a function whose data type is FNTYPE.
1808 For a library call, FNTYPE is 0. */
1811 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1812 tree fntype, /* tree ptr for function decl */
1813 rtx libname, /* SYMBOL_REF of library name or 0 */
1816 static CUMULATIVE_ARGS zero_cum;
1817 tree param, next_param;
1819 if (TARGET_DEBUG_ARG)
1821 fprintf (stderr, "\ninit_cumulative_args (");
1823 fprintf (stderr, "fntype code = %s, ret code = %s",
1824 tree_code_name[(int) TREE_CODE (fntype)],
1825 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1827 fprintf (stderr, "no fntype");
1830 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1835 /* Set up the number of registers to use for passing arguments. */
1837 cum->nregs = ix86_function_regparm (fntype, fndecl);
1839 cum->nregs = ix86_regparm;
1840 cum->sse_nregs = SSE_REGPARM_MAX;
1841 cum->mmx_nregs = MMX_REGPARM_MAX;
1842 cum->warn_sse = true;
1843 cum->warn_mmx = true;
1844 cum->maybe_vaarg = false;
1846 /* Use ecx and edx registers if function has fastcall attribute */
1847 if (fntype && !TARGET_64BIT)
1849 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1857 /* Determine if this function has variable arguments. This is
1858 indicated by the last argument being 'void_type_mode' if there
1859 are no variable arguments. If there are variable arguments, then
1860 we won't pass anything in registers */
1862 if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1864 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1865 param != 0; param = next_param)
1867 next_param = TREE_CHAIN (param);
1868 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1879 cum->maybe_vaarg = true;
1883 if ((!fntype && !libname)
1884 || (fntype && !TYPE_ARG_TYPES (fntype)))
1885 cum->maybe_vaarg = 1;
1887 if (TARGET_DEBUG_ARG)
1888 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1893 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1894 of this code is to classify each 8bytes of incoming argument by the register
1895 class and assign registers accordingly. */
1897 /* Return the union class of CLASS1 and CLASS2.
1898 See the x86-64 PS ABI for details. */
1900 static enum x86_64_reg_class
1901 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1903 /* Rule #1: If both classes are equal, this is the resulting class. */
1904 if (class1 == class2)
1907 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1909 if (class1 == X86_64_NO_CLASS)
1911 if (class2 == X86_64_NO_CLASS)
1914 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1915 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1916 return X86_64_MEMORY_CLASS;
1918 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1919 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1920 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1921 return X86_64_INTEGERSI_CLASS;
1922 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1923 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1924 return X86_64_INTEGER_CLASS;
1926 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1927 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1928 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1929 return X86_64_MEMORY_CLASS;
1931 /* Rule #6: Otherwise class SSE is used. */
1932 return X86_64_SSE_CLASS;
1935 /* Classify the argument of type TYPE and mode MODE.
1936 CLASSES will be filled by the register class used to pass each word
1937 of the operand. The number of words is returned. In case the parameter
1938 should be passed in memory, 0 is returned. As a special case for zero
1939 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1941 BIT_OFFSET is used internally for handling records and specifies offset
1942 of the offset in bits modulo 256 to avoid overflow cases.
1944 See the x86-64 PS ABI for details.
1948 classify_argument (enum machine_mode mode, tree type,
1949 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1951 HOST_WIDE_INT bytes =
1952 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1953 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1955 /* Variable sized entities are always passed/returned in memory. */
1959 if (mode != VOIDmode
1960 && MUST_PASS_IN_STACK (mode, type))
1963 if (type && AGGREGATE_TYPE_P (type))
1967 enum x86_64_reg_class subclasses[MAX_CLASSES];
1969 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1973 for (i = 0; i < words; i++)
1974 classes[i] = X86_64_NO_CLASS;
1976 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1977 signalize memory class, so handle it as special case. */
1980 classes[0] = X86_64_NO_CLASS;
1984 /* Classify each field of record and merge classes. */
1985 if (TREE_CODE (type) == RECORD_TYPE)
1987 /* For classes first merge in the field of the subclasses. */
1988 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1990 tree bases = TYPE_BINFO_BASETYPES (type);
1991 int n_bases = TREE_VEC_LENGTH (bases);
1994 for (i = 0; i < n_bases; ++i)
1996 tree binfo = TREE_VEC_ELT (bases, i);
1998 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1999 tree type = BINFO_TYPE (binfo);
2001 num = classify_argument (TYPE_MODE (type),
2003 (offset + bit_offset) % 256);
2006 for (i = 0; i < num; i++)
2008 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2010 merge_classes (subclasses[i], classes[i + pos]);
2014 /* And now merge the fields of structure. */
2015 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2017 if (TREE_CODE (field) == FIELD_DECL)
2021 /* Bitfields are always classified as integer. Handle them
2022 early, since later code would consider them to be
2023 misaligned integers. */
2024 if (DECL_BIT_FIELD (field))
2026 for (i = int_bit_position (field) / 8 / 8;
2027 i < (int_bit_position (field)
2028 + tree_low_cst (DECL_SIZE (field), 0)
2031 merge_classes (X86_64_INTEGER_CLASS,
2036 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2037 TREE_TYPE (field), subclasses,
2038 (int_bit_position (field)
2039 + bit_offset) % 256);
2042 for (i = 0; i < num; i++)
2045 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2047 merge_classes (subclasses[i], classes[i + pos]);
2053 /* Arrays are handled as small records. */
2054 else if (TREE_CODE (type) == ARRAY_TYPE)
2057 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2058 TREE_TYPE (type), subclasses, bit_offset);
2062 /* The partial classes are now full classes. */
2063 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2064 subclasses[0] = X86_64_SSE_CLASS;
2065 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2066 subclasses[0] = X86_64_INTEGER_CLASS;
2068 for (i = 0; i < words; i++)
2069 classes[i] = subclasses[i % num];
2071 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2072 else if (TREE_CODE (type) == UNION_TYPE
2073 || TREE_CODE (type) == QUAL_UNION_TYPE)
2075 /* For classes first merge in the field of the subclasses. */
2076 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2078 tree bases = TYPE_BINFO_BASETYPES (type);
2079 int n_bases = TREE_VEC_LENGTH (bases);
2082 for (i = 0; i < n_bases; ++i)
2084 tree binfo = TREE_VEC_ELT (bases, i);
2086 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2087 tree type = BINFO_TYPE (binfo);
2089 num = classify_argument (TYPE_MODE (type),
2091 (offset + (bit_offset % 64)) % 256);
2094 for (i = 0; i < num; i++)
2096 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2098 merge_classes (subclasses[i], classes[i + pos]);
2102 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2104 if (TREE_CODE (field) == FIELD_DECL)
2107 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2108 TREE_TYPE (field), subclasses,
2112 for (i = 0; i < num; i++)
2113 classes[i] = merge_classes (subclasses[i], classes[i]);
2117 else if (TREE_CODE (type) == SET_TYPE)
2121 classes[0] = X86_64_INTEGERSI_CLASS;
2124 else if (bytes <= 8)
2126 classes[0] = X86_64_INTEGER_CLASS;
2129 else if (bytes <= 12)
2131 classes[0] = X86_64_INTEGER_CLASS;
2132 classes[1] = X86_64_INTEGERSI_CLASS;
2137 classes[0] = X86_64_INTEGER_CLASS;
2138 classes[1] = X86_64_INTEGER_CLASS;
2145 /* Final merger cleanup. */
2146 for (i = 0; i < words; i++)
2148 /* If one class is MEMORY, everything should be passed in
2150 if (classes[i] == X86_64_MEMORY_CLASS)
2153 /* The X86_64_SSEUP_CLASS should be always preceded by
2154 X86_64_SSE_CLASS. */
2155 if (classes[i] == X86_64_SSEUP_CLASS
2156 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2157 classes[i] = X86_64_SSE_CLASS;
2159 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2160 if (classes[i] == X86_64_X87UP_CLASS
2161 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2162 classes[i] = X86_64_SSE_CLASS;
2167 /* Compute alignment needed. We align all types to natural boundaries with
2168 exception of XFmode that is aligned to 64bits. */
2169 if (mode != VOIDmode && mode != BLKmode)
2171 int mode_alignment = GET_MODE_BITSIZE (mode);
2174 mode_alignment = 128;
2175 else if (mode == XCmode)
2176 mode_alignment = 256;
2177 /* Misaligned fields are always returned in memory. */
2178 if (bit_offset % mode_alignment)
2182 /* Classification of atomic types. */
2192 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2193 classes[0] = X86_64_INTEGERSI_CLASS;
2195 classes[0] = X86_64_INTEGER_CLASS;
2199 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2202 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2203 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2206 if (!(bit_offset % 64))
2207 classes[0] = X86_64_SSESF_CLASS;
2209 classes[0] = X86_64_SSE_CLASS;
2212 classes[0] = X86_64_SSEDF_CLASS;
2215 classes[0] = X86_64_X87_CLASS;
2216 classes[1] = X86_64_X87UP_CLASS;
2222 classes[0] = X86_64_X87_CLASS;
2223 classes[1] = X86_64_X87UP_CLASS;
2224 classes[2] = X86_64_X87_CLASS;
2225 classes[3] = X86_64_X87UP_CLASS;
2228 classes[0] = X86_64_SSEDF_CLASS;
2229 classes[1] = X86_64_SSEDF_CLASS;
2232 classes[0] = X86_64_SSE_CLASS;
2240 classes[0] = X86_64_SSE_CLASS;
2241 classes[1] = X86_64_SSEUP_CLASS;
2256 /* Examine the argument and return set number of register required in each
2257 class. Return 0 iff parameter should be passed in memory. */
2259 examine_argument (enum machine_mode mode, tree type, int in_return,
2260 int *int_nregs, int *sse_nregs)
2262 enum x86_64_reg_class class[MAX_CLASSES];
2263 int n = classify_argument (mode, type, class, 0);
2269 for (n--; n >= 0; n--)
2272 case X86_64_INTEGER_CLASS:
2273 case X86_64_INTEGERSI_CLASS:
2276 case X86_64_SSE_CLASS:
2277 case X86_64_SSESF_CLASS:
2278 case X86_64_SSEDF_CLASS:
2281 case X86_64_NO_CLASS:
2282 case X86_64_SSEUP_CLASS:
2284 case X86_64_X87_CLASS:
2285 case X86_64_X87UP_CLASS:
2289 case X86_64_MEMORY_CLASS:
2294 /* Construct container for the argument used by GCC interface. See
2295 FUNCTION_ARG for the detailed description. */
2297 construct_container (enum machine_mode mode, tree type, int in_return,
2298 int nintregs, int nsseregs, const int * intreg,
2301 enum machine_mode tmpmode;
2303 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2304 enum x86_64_reg_class class[MAX_CLASSES];
2308 int needed_sseregs, needed_intregs;
2309 rtx exp[MAX_CLASSES];
2312 n = classify_argument (mode, type, class, 0);
2313 if (TARGET_DEBUG_ARG)
2316 fprintf (stderr, "Memory class\n");
2319 fprintf (stderr, "Classes:");
2320 for (i = 0; i < n; i++)
2322 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2324 fprintf (stderr, "\n");
2329 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2331 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2334 /* First construct simple cases. Avoid SCmode, since we want to use
2335 single register to pass this type. */
2336 if (n == 1 && mode != SCmode)
2339 case X86_64_INTEGER_CLASS:
2340 case X86_64_INTEGERSI_CLASS:
2341 return gen_rtx_REG (mode, intreg[0]);
2342 case X86_64_SSE_CLASS:
2343 case X86_64_SSESF_CLASS:
2344 case X86_64_SSEDF_CLASS:
2345 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2346 case X86_64_X87_CLASS:
2347 return gen_rtx_REG (mode, FIRST_STACK_REG);
2348 case X86_64_NO_CLASS:
2349 /* Zero sized array, struct or class. */
2354 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2355 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2357 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2358 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2359 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2360 && class[1] == X86_64_INTEGER_CLASS
2361 && (mode == CDImode || mode == TImode || mode == TFmode)
2362 && intreg[0] + 1 == intreg[1])
2363 return gen_rtx_REG (mode, intreg[0]);
2365 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2366 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2367 return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2369 /* Otherwise figure out the entries of the PARALLEL. */
2370 for (i = 0; i < n; i++)
2374 case X86_64_NO_CLASS:
2376 case X86_64_INTEGER_CLASS:
2377 case X86_64_INTEGERSI_CLASS:
2378 /* Merge TImodes on aligned occasions here too. */
2379 if (i * 8 + 8 > bytes)
2380 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2381 else if (class[i] == X86_64_INTEGERSI_CLASS)
2385 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2386 if (tmpmode == BLKmode)
2388 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2389 gen_rtx_REG (tmpmode, *intreg),
2393 case X86_64_SSESF_CLASS:
2394 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2395 gen_rtx_REG (SFmode,
2396 SSE_REGNO (sse_regno)),
2400 case X86_64_SSEDF_CLASS:
2401 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2402 gen_rtx_REG (DFmode,
2403 SSE_REGNO (sse_regno)),
2407 case X86_64_SSE_CLASS:
2408 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2412 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2413 gen_rtx_REG (tmpmode,
2414 SSE_REGNO (sse_regno)),
2416 if (tmpmode == TImode)
2424 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2425 for (i = 0; i < nexps; i++)
2426 XVECEXP (ret, 0, i) = exp [i];
2430 /* Update the data in CUM to advance over an argument
2431 of mode MODE and data type TYPE.
2432 (TYPE is null for libcalls where that information may not be available.) */
2435 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2436 enum machine_mode mode, /* current arg mode */
2437 tree type, /* type of the argument or 0 if lib support */
2438 int named) /* whether or not the argument was named */
2441 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2442 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2444 if (TARGET_DEBUG_ARG)
2446 "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2447 words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2450 int int_nregs, sse_nregs;
2451 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2452 cum->words += words;
2453 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2455 cum->nregs -= int_nregs;
2456 cum->sse_nregs -= sse_nregs;
2457 cum->regno += int_nregs;
2458 cum->sse_regno += sse_nregs;
2461 cum->words += words;
2465 if (TARGET_SSE && SSE_REG_MODE_P (mode)
2466 && (!type || !AGGREGATE_TYPE_P (type)))
2468 cum->sse_words += words;
2469 cum->sse_nregs -= 1;
2470 cum->sse_regno += 1;
2471 if (cum->sse_nregs <= 0)
2477 else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2478 && (!type || !AGGREGATE_TYPE_P (type)))
2480 cum->mmx_words += words;
2481 cum->mmx_nregs -= 1;
2482 cum->mmx_regno += 1;
2483 if (cum->mmx_nregs <= 0)
2491 cum->words += words;
2492 cum->nregs -= words;
2493 cum->regno += words;
2495 if (cum->nregs <= 0)
2505 /* Define where to put the arguments to a function.
2506 Value is zero to push the argument on the stack,
2507 or a hard register in which to store the argument.
2509 MODE is the argument's machine mode.
2510 TYPE is the data type of the argument (as a tree).
2511 This is null for libcalls where that information may
2513 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2514 the preceding args and about the function being called.
2515 NAMED is nonzero if this argument is a named parameter
2516 (otherwise it is an extra parameter matching an ellipsis). */
2519 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2520 enum machine_mode mode, /* current arg mode */
2521 tree type, /* type of the argument or 0 if lib support */
2522 int named) /* != 0 for normal args, == 0 for ... args */
2526 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2527 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2528 static bool warnedsse, warnedmmx;
2530 /* Handle a hidden AL argument containing number of registers for varargs
2531 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2533 if (mode == VOIDmode)
2536 return GEN_INT (cum->maybe_vaarg
2537 ? (cum->sse_nregs < 0
2545 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2546 &x86_64_int_parameter_registers [cum->regno],
2551 /* For now, pass fp/complex values on the stack. */
2563 if (words <= cum->nregs)
2565 int regno = cum->regno;
2567 /* Fastcall allocates the first two DWORD (SImode) or
2568 smaller arguments to ECX and EDX. */
2571 if (mode == BLKmode || mode == DImode)
2574 /* ECX not EAX is the first allocated register. */
2578 ret = gen_rtx_REG (mode, regno);
2588 if (!type || !AGGREGATE_TYPE_P (type))
2590 if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2593 warning ("SSE vector argument without SSE enabled "
2597 ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
2604 if (!type || !AGGREGATE_TYPE_P (type))
2606 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2609 warning ("MMX vector argument without MMX enabled "
2613 ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
2618 if (TARGET_DEBUG_ARG)
2621 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2622 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2625 print_simple_rtl (stderr, ret);
2627 fprintf (stderr, ", stack");
2629 fprintf (stderr, " )\n");
2635 /* A C expression that indicates when an argument must be passed by
2636 reference. If nonzero for an argument, a copy of that argument is
2637 made in memory and a pointer to the argument is passed instead of
2638 the argument itself. The pointer is passed in whatever way is
2639 appropriate for passing a pointer to that type. */
2642 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2643 enum machine_mode mode ATTRIBUTE_UNUSED,
2644 tree type, int named ATTRIBUTE_UNUSED)
2649 if (type && int_size_in_bytes (type) == -1)
2651 if (TARGET_DEBUG_ARG)
2652 fprintf (stderr, "function_arg_pass_by_reference\n");
2659 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2662 contains_128bit_aligned_vector_p (tree type)
2664 enum machine_mode mode = TYPE_MODE (type);
2665 if (SSE_REG_MODE_P (mode)
2666 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2668 if (TYPE_ALIGN (type) < 128)
2671 if (AGGREGATE_TYPE_P (type))
2673 /* Walk the aggregates recursively. */
2674 if (TREE_CODE (type) == RECORD_TYPE
2675 || TREE_CODE (type) == UNION_TYPE
2676 || TREE_CODE (type) == QUAL_UNION_TYPE)
2680 if (TYPE_BINFO (type) != NULL
2681 && TYPE_BINFO_BASETYPES (type) != NULL)
2683 tree bases = TYPE_BINFO_BASETYPES (type);
2684 int n_bases = TREE_VEC_LENGTH (bases);
2687 for (i = 0; i < n_bases; ++i)
2689 tree binfo = TREE_VEC_ELT (bases, i);
2690 tree type = BINFO_TYPE (binfo);
2692 if (contains_128bit_aligned_vector_p (type))
2696 /* And now merge the fields of structure. */
2697 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2699 if (TREE_CODE (field) == FIELD_DECL
2700 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2704 /* Just for use if some languages passes arrays by value. */
2705 else if (TREE_CODE (type) == ARRAY_TYPE)
2707 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2716 /* Gives the alignment boundary, in bits, of an argument with the
2717 specified mode and type. */
2720 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2724 align = TYPE_ALIGN (type);
2726 align = GET_MODE_ALIGNMENT (mode);
2727 if (align < PARM_BOUNDARY)
2728 align = PARM_BOUNDARY;
2731 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2732 make an exception for SSE modes since these require 128bit
2735 The handling here differs from field_alignment. ICC aligns MMX
2736 arguments to 4 byte boundaries, while structure fields are aligned
2737 to 8 byte boundaries. */
2740 if (!SSE_REG_MODE_P (mode))
2741 align = PARM_BOUNDARY;
2745 if (!contains_128bit_aligned_vector_p (type))
2746 align = PARM_BOUNDARY;
2754 /* Return true if N is a possible register number of function value. */
2756 ix86_function_value_regno_p (int regno)
2760 return ((regno) == 0
2761 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2762 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2764 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2765 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2766 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2769 /* Define how to find the value returned by a function.
2770 VALTYPE is the data type of the value (as a tree).
2771 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2772 otherwise, FUNC is 0. */
2774 ix86_function_value (tree valtype)
2778 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2779 REGPARM_MAX, SSE_REGPARM_MAX,
2780 x86_64_int_return_registers, 0);
2781 /* For zero sized structures, construct_container return NULL, but we need
2782 to keep rest of compiler happy by returning meaningful value. */
2784 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2788 return gen_rtx_REG (TYPE_MODE (valtype),
2789 ix86_value_regno (TYPE_MODE (valtype)));
2792 /* Return false iff type is returned in memory. */
2794 ix86_return_in_memory (tree type)
2796 int needed_intregs, needed_sseregs, size;
2797 enum machine_mode mode = TYPE_MODE (type);
2800 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2802 if (mode == BLKmode)
2805 size = int_size_in_bytes (type);
2807 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2810 if (VECTOR_MODE_P (mode) || mode == TImode)
2812 /* User-created vectors small enough to fit in EAX. */
2816 /* MMX/3dNow values are returned on the stack, since we've
2817 got to EMMS/FEMMS before returning. */
2821 /* SSE values are returned in XMM0. */
2822 /* ??? Except when it doesn't exist? We have a choice of
2823 either (1) being abi incompatible with a -march switch,
2824 or (2) generating an error here. Given no good solution,
2825 I think the safest thing is one warning. The user won't
2826 be able to use -Werror, but.... */
2837 warning ("SSE vector return without SSE enabled "
2852 /* Define how to find the value returned by a library function
2853 assuming the value has mode MODE. */
2855 ix86_libcall_value (enum machine_mode mode)
2865 return gen_rtx_REG (mode, FIRST_SSE_REG);
2868 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2873 return gen_rtx_REG (mode, 0);
2877 return gen_rtx_REG (mode, ix86_value_regno (mode));
2880 /* Given a mode, return the register to use for a return value. */
2883 ix86_value_regno (enum machine_mode mode)
2885 /* Floating point return values in %st(0). */
2886 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2887 return FIRST_FLOAT_REG;
2888 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2889 we prevent this case when sse is not available. */
2890 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2891 return FIRST_SSE_REG;
2892 /* Everything else in %eax. */
2896 /* Create the va_list data type. */
2899 ix86_build_builtin_va_list (void)
2901 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2903 /* For i386 we use plain pointer to argument area. */
2905 return build_pointer_type (char_type_node);
2907 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2908 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2910 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2911 unsigned_type_node);
2912 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2913 unsigned_type_node);
2914 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2916 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2919 DECL_FIELD_CONTEXT (f_gpr) = record;
2920 DECL_FIELD_CONTEXT (f_fpr) = record;
2921 DECL_FIELD_CONTEXT (f_ovf) = record;
2922 DECL_FIELD_CONTEXT (f_sav) = record;
2924 TREE_CHAIN (record) = type_decl;
2925 TYPE_NAME (record) = type_decl;
2926 TYPE_FIELDS (record) = f_gpr;
2927 TREE_CHAIN (f_gpr) = f_fpr;
2928 TREE_CHAIN (f_fpr) = f_ovf;
2929 TREE_CHAIN (f_ovf) = f_sav;
2931 layout_type (record);
2933 /* The correct type is an array type of one element. */
2934 return build_array_type (record, build_index_type (size_zero_node));
2937 /* Perform any needed actions needed for a function that is receiving a
2938 variable number of arguments.
2942 MODE and TYPE are the mode and type of the current parameter.
2944 PRETEND_SIZE is a variable that should be set to the amount of stack
2945 that must be pushed by the prolog to pretend that our caller pushed
2948 Normally, this macro will push all remaining incoming registers on the
2949 stack and set PRETEND_SIZE to the length of the registers pushed. */
2952 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2953 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2956 CUMULATIVE_ARGS next_cum;
2957 rtx save_area = NULL_RTX, mem;
2970 /* Indicate to allocate space on the stack for varargs save area. */
2971 ix86_save_varrargs_registers = 1;
2973 cfun->stack_alignment_needed = 128;
2975 fntype = TREE_TYPE (current_function_decl);
2976 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2977 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2978 != void_type_node));
2980 /* For varargs, we do not want to skip the dummy va_dcl argument.
2981 For stdargs, we do want to skip the last named argument. */
2984 function_arg_advance (&next_cum, mode, type, 1);
2987 save_area = frame_pointer_rtx;
2989 set = get_varargs_alias_set ();
2991 for (i = next_cum.regno; i < ix86_regparm; i++)
2993 mem = gen_rtx_MEM (Pmode,
2994 plus_constant (save_area, i * UNITS_PER_WORD));
2995 set_mem_alias_set (mem, set);
2996 emit_move_insn (mem, gen_rtx_REG (Pmode,
2997 x86_64_int_parameter_registers[i]));
3000 if (next_cum.sse_nregs)
3002 /* Now emit code to save SSE registers. The AX parameter contains number
3003 of SSE parameter registers used to call this function. We use
3004 sse_prologue_save insn template that produces computed jump across
3005 SSE saves. We need some preparation work to get this working. */
3007 label = gen_label_rtx ();
3008 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3010 /* Compute address to jump to :
3011 label - 5*eax + nnamed_sse_arguments*5 */
3012 tmp_reg = gen_reg_rtx (Pmode);
3013 nsse_reg = gen_reg_rtx (Pmode);
3014 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3015 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3016 gen_rtx_MULT (Pmode, nsse_reg,
3018 if (next_cum.sse_regno)
3021 gen_rtx_CONST (DImode,
3022 gen_rtx_PLUS (DImode,
3024 GEN_INT (next_cum.sse_regno * 4))));
3026 emit_move_insn (nsse_reg, label_ref);
3027 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3029 /* Compute address of memory block we save into. We always use pointer
3030 pointing 127 bytes after first byte to store - this is needed to keep
3031 instruction size limited by 4 bytes. */
3032 tmp_reg = gen_reg_rtx (Pmode);
3033 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3034 plus_constant (save_area,
3035 8 * REGPARM_MAX + 127)));
3036 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3037 set_mem_alias_set (mem, set);
3038 set_mem_align (mem, BITS_PER_WORD);
3040 /* And finally do the dirty job! */
3041 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3042 GEN_INT (next_cum.sse_regno), label));
3047 /* Implement va_start. */
3050 ix86_va_start (tree valist, rtx nextarg)
3052 HOST_WIDE_INT words, n_gpr, n_fpr;
3053 tree f_gpr, f_fpr, f_ovf, f_sav;
3054 tree gpr, fpr, ovf, sav, t;
3056 /* Only 64bit target needs something special. */
3059 std_expand_builtin_va_start (valist, nextarg);
3063 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3064 f_fpr = TREE_CHAIN (f_gpr);
3065 f_ovf = TREE_CHAIN (f_fpr);
3066 f_sav = TREE_CHAIN (f_ovf);
3068 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3069 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3070 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3071 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3072 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3074 /* Count number of gp and fp argument registers used. */
3075 words = current_function_args_info.words;
3076 n_gpr = current_function_args_info.regno;
3077 n_fpr = current_function_args_info.sse_regno;
3079 if (TARGET_DEBUG_ARG)
3080 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3081 (int) words, (int) n_gpr, (int) n_fpr);
3083 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3084 build_int_2 (n_gpr * 8, 0));
3085 TREE_SIDE_EFFECTS (t) = 1;
3086 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3088 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3089 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3090 TREE_SIDE_EFFECTS (t) = 1;
3091 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3093 /* Find the overflow area. */
3094 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3096 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3097 build_int_2 (words * UNITS_PER_WORD, 0));
3098 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3099 TREE_SIDE_EFFECTS (t) = 1;
3100 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3102 /* Find the register save area.
3103 Prologue of the function save it right above stack frame. */
3104 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3105 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3106 TREE_SIDE_EFFECTS (t) = 1;
3107 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3110 /* Implement va_arg. */
3112 ix86_va_arg (tree valist, tree type)
3114 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3115 tree f_gpr, f_fpr, f_ovf, f_sav;
3116 tree gpr, fpr, ovf, sav, t;
3118 rtx lab_false, lab_over = NULL_RTX;
3123 /* Only 64bit target needs something special. */
3126 return std_expand_builtin_va_arg (valist, type);
3129 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3130 f_fpr = TREE_CHAIN (f_gpr);
3131 f_ovf = TREE_CHAIN (f_fpr);
3132 f_sav = TREE_CHAIN (f_ovf);
3134 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3135 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3136 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3137 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3138 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3140 size = int_size_in_bytes (type);
3143 /* Passed by reference. */
3145 type = build_pointer_type (type);
3146 size = int_size_in_bytes (type);
3148 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3150 container = construct_container (TYPE_MODE (type), type, 0,
3151 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3153 * Pull the value out of the saved registers ...
3156 addr_rtx = gen_reg_rtx (Pmode);
3160 rtx int_addr_rtx, sse_addr_rtx;
3161 int needed_intregs, needed_sseregs;
3164 lab_over = gen_label_rtx ();
3165 lab_false = gen_label_rtx ();
3167 examine_argument (TYPE_MODE (type), type, 0,
3168 &needed_intregs, &needed_sseregs);
3171 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3172 || TYPE_ALIGN (type) > 128);
3174 /* In case we are passing structure, verify that it is consecutive block
3175 on the register save area. If not we need to do moves. */
3176 if (!need_temp && !REG_P (container))
3178 /* Verify that all registers are strictly consecutive */
3179 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3183 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3185 rtx slot = XVECEXP (container, 0, i);
3186 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3187 || INTVAL (XEXP (slot, 1)) != i * 16)
3195 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3197 rtx slot = XVECEXP (container, 0, i);
3198 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3199 || INTVAL (XEXP (slot, 1)) != i * 8)
3206 int_addr_rtx = addr_rtx;
3207 sse_addr_rtx = addr_rtx;
3211 int_addr_rtx = gen_reg_rtx (Pmode);
3212 sse_addr_rtx = gen_reg_rtx (Pmode);
3214 /* First ensure that we fit completely in registers. */
3217 emit_cmp_and_jump_insns (expand_expr
3218 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3219 GEN_INT ((REGPARM_MAX - needed_intregs +
3220 1) * 8), GE, const1_rtx, SImode,
3225 emit_cmp_and_jump_insns (expand_expr
3226 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3227 GEN_INT ((SSE_REGPARM_MAX -
3228 needed_sseregs + 1) * 16 +
3229 REGPARM_MAX * 8), GE, const1_rtx,
3230 SImode, 1, lab_false);
3233 /* Compute index to start of area used for integer regs. */
3236 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3237 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3238 if (r != int_addr_rtx)
3239 emit_move_insn (int_addr_rtx, r);
3243 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3244 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3245 if (r != sse_addr_rtx)
3246 emit_move_insn (sse_addr_rtx, r);
3254 /* Never use the memory itself, as it has the alias set. */
3255 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3256 mem = gen_rtx_MEM (BLKmode, x);
3257 force_operand (x, addr_rtx);
3258 set_mem_alias_set (mem, get_varargs_alias_set ());
3259 set_mem_align (mem, BITS_PER_UNIT);
3261 for (i = 0; i < XVECLEN (container, 0); i++)
3263 rtx slot = XVECEXP (container, 0, i);
3264 rtx reg = XEXP (slot, 0);
3265 enum machine_mode mode = GET_MODE (reg);
3271 if (SSE_REGNO_P (REGNO (reg)))
3273 src_addr = sse_addr_rtx;
3274 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3278 src_addr = int_addr_rtx;
3279 src_offset = REGNO (reg) * 8;
3281 src_mem = gen_rtx_MEM (mode, src_addr);
3282 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3283 src_mem = adjust_address (src_mem, mode, src_offset);
3284 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3285 emit_move_insn (dest_mem, src_mem);
3292 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3293 build_int_2 (needed_intregs * 8, 0));
3294 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3295 TREE_SIDE_EFFECTS (t) = 1;
3296 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3301 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3302 build_int_2 (needed_sseregs * 16, 0));
3303 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3304 TREE_SIDE_EFFECTS (t) = 1;
3305 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3308 emit_jump_insn (gen_jump (lab_over));
3310 emit_label (lab_false);
3313 /* ... otherwise out of the overflow area. */
3315 /* Care for on-stack alignment if needed. */
3316 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3320 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3321 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3322 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3326 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3328 emit_move_insn (addr_rtx, r);
3331 build (PLUS_EXPR, TREE_TYPE (t), t,
3332 build_int_2 (rsize * UNITS_PER_WORD, 0));
3333 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3334 TREE_SIDE_EFFECTS (t) = 1;
3335 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3338 emit_label (lab_over);
3342 r = gen_rtx_MEM (Pmode, addr_rtx);
3343 set_mem_alias_set (r, get_varargs_alias_set ());
3344 emit_move_insn (addr_rtx, r);
3350 /* Return nonzero if OP is either a i387 or SSE fp register. */
3352 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3354 return ANY_FP_REG_P (op);
3357 /* Return nonzero if OP is an i387 fp register. */
3359 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3361 return FP_REG_P (op);
3364 /* Return nonzero if OP is a non-fp register_operand. */
3366 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3368 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3371 /* Return nonzero if OP is a register operand other than an
3372 i387 fp register. */
3374 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3376 return register_operand (op, mode) && !FP_REG_P (op);
3379 /* Return nonzero if OP is general operand representable on x86_64. */
3382 x86_64_general_operand (rtx op, enum machine_mode mode)
3385 return general_operand (op, mode);
3386 if (nonimmediate_operand (op, mode))
3388 return x86_64_sign_extended_value (op);
3391 /* Return nonzero if OP is general operand representable on x86_64
3392 as either sign extended or zero extended constant. */
3395 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3398 return general_operand (op, mode);
3399 if (nonimmediate_operand (op, mode))
3401 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3404 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3407 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3410 return nonmemory_operand (op, mode);
3411 if (register_operand (op, mode))
3413 return x86_64_sign_extended_value (op);
3416 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3419 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3421 if (!TARGET_64BIT || !flag_pic)
3422 return nonmemory_operand (op, mode);
3423 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3425 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3430 /* Return nonzero if OPNUM's MEM should be matched
3431 in movabs* patterns. */
3434 ix86_check_movabs (rtx insn, int opnum)
3438 set = PATTERN (insn);
3439 if (GET_CODE (set) == PARALLEL)
3440 set = XVECEXP (set, 0, 0);
3441 if (GET_CODE (set) != SET)
3443 mem = XEXP (set, opnum);
3444 while (GET_CODE (mem) == SUBREG)
3445 mem = SUBREG_REG (mem);
3446 if (GET_CODE (mem) != MEM)
3448 return (volatile_ok || !MEM_VOLATILE_P (mem));
3451 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3454 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3457 return nonmemory_operand (op, mode);
3458 if (register_operand (op, mode))
3460 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3463 /* Return nonzero if OP is immediate operand representable on x86_64. */
3466 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3469 return immediate_operand (op, mode);
3470 return x86_64_sign_extended_value (op);
3473 /* Return nonzero if OP is immediate operand representable on x86_64. */
3476 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3478 return x86_64_zero_extended_value (op);
3481 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3482 for shift & compare patterns, as shifting by 0 does not change flags),
3483 else return zero. */
3486 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3488 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3491 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3492 reference and a constant. */
3495 symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3497 switch (GET_CODE (op))
3505 if (GET_CODE (op) == SYMBOL_REF
3506 || GET_CODE (op) == LABEL_REF
3507 || (GET_CODE (op) == UNSPEC
3508 && (XINT (op, 1) == UNSPEC_GOT
3509 || XINT (op, 1) == UNSPEC_GOTOFF
3510 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3512 if (GET_CODE (op) != PLUS
3513 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3517 if (GET_CODE (op) == SYMBOL_REF
3518 || GET_CODE (op) == LABEL_REF)
3520 /* Only @GOTOFF gets offsets. */
3521 if (GET_CODE (op) != UNSPEC
3522 || XINT (op, 1) != UNSPEC_GOTOFF)
3525 op = XVECEXP (op, 0, 0);
3526 if (GET_CODE (op) == SYMBOL_REF
3527 || GET_CODE (op) == LABEL_REF)
3536 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3539 pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3541 if (GET_CODE (op) != CONST)
3546 if (GET_CODE (op) == UNSPEC
3547 && XINT (op, 1) == UNSPEC_GOTPCREL)
3549 if (GET_CODE (op) == PLUS
3550 && GET_CODE (XEXP (op, 0)) == UNSPEC
3551 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3556 if (GET_CODE (op) == UNSPEC)
3558 if (GET_CODE (op) != PLUS
3559 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3562 if (GET_CODE (op) == UNSPEC)
3568 /* Return true if OP is a symbolic operand that resolves locally. */
3571 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3573 if (GET_CODE (op) == CONST
3574 && GET_CODE (XEXP (op, 0)) == PLUS
3575 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3576 op = XEXP (XEXP (op, 0), 0);
3578 if (GET_CODE (op) == LABEL_REF)
3581 if (GET_CODE (op) != SYMBOL_REF)
3584 if (SYMBOL_REF_LOCAL_P (op))
3587 /* There is, however, a not insubstantial body of code in the rest of
3588 the compiler that assumes it can just stick the results of
3589 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3590 /* ??? This is a hack. Should update the body of the compiler to
3591 always create a DECL an invoke targetm.encode_section_info. */
3592 if (strncmp (XSTR (op, 0), internal_label_prefix,
3593 internal_label_prefix_len) == 0)
3599 /* Test for various thread-local symbols. */
3602 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3604 if (GET_CODE (op) != SYMBOL_REF)
3606 return SYMBOL_REF_TLS_MODEL (op);
3610 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3612 if (GET_CODE (op) != SYMBOL_REF)
3614 return SYMBOL_REF_TLS_MODEL (op) == kind;
3618 global_dynamic_symbolic_operand (rtx op,
3619 enum machine_mode mode ATTRIBUTE_UNUSED)
3621 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3625 local_dynamic_symbolic_operand (rtx op,
3626 enum machine_mode mode ATTRIBUTE_UNUSED)
3628 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3632 initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3634 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3638 local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3640 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3643 /* Test for a valid operand for a call instruction. Don't allow the
3644 arg pointer register or virtual regs since they may decay into
3645 reg + const, which the patterns can't handle. */
3648 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3650 /* Disallow indirect through a virtual register. This leads to
3651 compiler aborts when trying to eliminate them. */
3652 if (GET_CODE (op) == REG
3653 && (op == arg_pointer_rtx
3654 || op == frame_pointer_rtx
3655 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3656 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3659 /* Disallow `call 1234'. Due to varying assembler lameness this
3660 gets either rejected or translated to `call .+1234'. */
3661 if (GET_CODE (op) == CONST_INT)
3664 /* Explicitly allow SYMBOL_REF even if pic. */
3665 if (GET_CODE (op) == SYMBOL_REF)
3668 /* Otherwise we can allow any general_operand in the address. */
3669 return general_operand (op, Pmode);
3672 /* Test for a valid operand for a call instruction. Don't allow the
3673 arg pointer register or virtual regs since they may decay into
3674 reg + const, which the patterns can't handle. */
3677 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3679 /* Disallow indirect through a virtual register. This leads to
3680 compiler aborts when trying to eliminate them. */
3681 if (GET_CODE (op) == REG
3682 && (op == arg_pointer_rtx
3683 || op == frame_pointer_rtx
3684 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3685 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3688 /* Explicitly allow SYMBOL_REF even if pic. */
3689 if (GET_CODE (op) == SYMBOL_REF)
3692 /* Otherwise we can only allow register operands. */
3693 return register_operand (op, Pmode);
3697 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3699 if (GET_CODE (op) == CONST
3700 && GET_CODE (XEXP (op, 0)) == PLUS
3701 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3702 op = XEXP (XEXP (op, 0), 0);
3703 return GET_CODE (op) == SYMBOL_REF;
3706 /* Match exactly zero and one. */
3709 const0_operand (rtx op, enum machine_mode mode)
3711 return op == CONST0_RTX (mode);
3715 const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3717 return op == const1_rtx;
3720 /* Match 2, 4, or 8. Used for leal multiplicands. */
3723 const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3725 return (GET_CODE (op) == CONST_INT
3726 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3730 const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3732 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3736 const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3738 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3742 const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3744 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3748 const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3750 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3754 /* True if this is a constant appropriate for an increment or decrement. */
3757 incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3759 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3760 registers, since carry flag is not set. */
3761 if (TARGET_PENTIUM4 && !optimize_size)
3763 return op == const1_rtx || op == constm1_rtx;
3766 /* Return nonzero if OP is acceptable as operand of DImode shift
3770 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3773 return nonimmediate_operand (op, mode);
3775 return register_operand (op, mode);
3778 /* Return false if this is the stack pointer, or any other fake
3779 register eliminable to the stack pointer. Otherwise, this is
3782 This is used to prevent esp from being used as an index reg.
3783 Which would only happen in pathological cases. */
3786 reg_no_sp_operand (rtx op, enum machine_mode mode)
3789 if (GET_CODE (t) == SUBREG)
3791 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3794 return register_operand (op, mode);
3798 mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3800 return MMX_REG_P (op);
3803 /* Return false if this is any eliminable register. Otherwise
3807 general_no_elim_operand (rtx op, enum machine_mode mode)
3810 if (GET_CODE (t) == SUBREG)
3812 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3813 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3814 || t == virtual_stack_dynamic_rtx)
3817 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3818 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3821 return general_operand (op, mode);
3824 /* Return false if this is any eliminable register. Otherwise
3825 register_operand or const_int. */
3828 nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3831 if (GET_CODE (t) == SUBREG)
3833 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3834 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3835 || t == virtual_stack_dynamic_rtx)
3838 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3841 /* Return false if this is any eliminable register or stack register,
3842 otherwise work like register_operand. */
3845 index_register_operand (rtx op, enum machine_mode mode)
3848 if (GET_CODE (t) == SUBREG)
3852 if (t == arg_pointer_rtx
3853 || t == frame_pointer_rtx
3854 || t == virtual_incoming_args_rtx
3855 || t == virtual_stack_vars_rtx
3856 || t == virtual_stack_dynamic_rtx
3857 || REGNO (t) == STACK_POINTER_REGNUM)
3860 return general_operand (op, mode);
3863 /* Return true if op is a Q_REGS class register. */
3866 q_regs_operand (rtx op, enum machine_mode mode)
3868 if (mode != VOIDmode && GET_MODE (op) != mode)
3870 if (GET_CODE (op) == SUBREG)
3871 op = SUBREG_REG (op);
3872 return ANY_QI_REG_P (op);
3875 /* Return true if op is an flags register. */
3878 flags_reg_operand (rtx op, enum machine_mode mode)
3880 if (mode != VOIDmode && GET_MODE (op) != mode)
3882 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3885 /* Return true if op is a NON_Q_REGS class register. */
3888 non_q_regs_operand (rtx op, enum machine_mode mode)
3890 if (mode != VOIDmode && GET_MODE (op) != mode)
3892 if (GET_CODE (op) == SUBREG)
3893 op = SUBREG_REG (op);
3894 return NON_QI_REG_P (op);
3898 zero_extended_scalar_load_operand (rtx op,
3899 enum machine_mode mode ATTRIBUTE_UNUSED)
3902 if (GET_CODE (op) != MEM)
3904 op = maybe_get_pool_constant (op);
3907 if (GET_CODE (op) != CONST_VECTOR)
3910 (GET_MODE_SIZE (GET_MODE (op)) /
3911 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3912 for (n_elts--; n_elts > 0; n_elts--)
3914 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3915 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3921 /* Return 1 when OP is operand acceptable for standard SSE move. */
3923 vector_move_operand (rtx op, enum machine_mode mode)
3925 if (nonimmediate_operand (op, mode))
3927 if (GET_MODE (op) != mode && mode != VOIDmode)
3929 return (op == CONST0_RTX (GET_MODE (op)));
3932 /* Return true if op if a valid address, and does not contain
3933 a segment override. */
3936 no_seg_address_operand (rtx op, enum machine_mode mode)
3938 struct ix86_address parts;
3940 if (! address_operand (op, mode))
3943 if (! ix86_decompose_address (op, &parts))
3946 return parts.seg == SEG_DEFAULT;
3949 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3952 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3954 enum rtx_code code = GET_CODE (op);
3957 /* Operations supported directly. */
3967 /* These are equivalent to ones above in non-IEEE comparisons. */
3974 return !TARGET_IEEE_FP;
3979 /* Return 1 if OP is a valid comparison operator in valid mode. */
3981 ix86_comparison_operator (rtx op, enum machine_mode mode)
3983 enum machine_mode inmode;
3984 enum rtx_code code = GET_CODE (op);
3985 if (mode != VOIDmode && GET_MODE (op) != mode)
3987 if (GET_RTX_CLASS (code) != '<')
3989 inmode = GET_MODE (XEXP (op, 0));
3991 if (inmode == CCFPmode || inmode == CCFPUmode)
3993 enum rtx_code second_code, bypass_code;
3994 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3995 return (bypass_code == NIL && second_code == NIL);
4002 if (inmode == CCmode || inmode == CCGCmode
4003 || inmode == CCGOCmode || inmode == CCNOmode)
4006 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4007 if (inmode == CCmode)
4011 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4019 /* Return 1 if OP is a valid comparison operator testing carry flag
4022 ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4024 enum machine_mode inmode;
4025 enum rtx_code code = GET_CODE (op);
4027 if (mode != VOIDmode && GET_MODE (op) != mode)
4029 if (GET_RTX_CLASS (code) != '<')
4031 inmode = GET_MODE (XEXP (op, 0));
4032 if (GET_CODE (XEXP (op, 0)) != REG
4033 || REGNO (XEXP (op, 0)) != 17
4034 || XEXP (op, 1) != const0_rtx)
4037 if (inmode == CCFPmode || inmode == CCFPUmode)
4039 enum rtx_code second_code, bypass_code;
4041 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4042 if (bypass_code != NIL || second_code != NIL)
4044 code = ix86_fp_compare_code_to_integer (code);
4046 else if (inmode != CCmode)
4051 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4054 fcmov_comparison_operator (rtx op, enum machine_mode mode)
4056 enum machine_mode inmode;
4057 enum rtx_code code = GET_CODE (op);
4059 if (mode != VOIDmode && GET_MODE (op) != mode)
4061 if (GET_RTX_CLASS (code) != '<')
4063 inmode = GET_MODE (XEXP (op, 0));
4064 if (inmode == CCFPmode || inmode == CCFPUmode)
4066 enum rtx_code second_code, bypass_code;
4068 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4069 if (bypass_code != NIL || second_code != NIL)
4071 code = ix86_fp_compare_code_to_integer (code);
4073 /* i387 supports just limited amount of conditional codes. */
4076 case LTU: case GTU: case LEU: case GEU:
4077 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4080 case ORDERED: case UNORDERED:
4088 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4091 promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4093 switch (GET_CODE (op))
4096 /* Modern CPUs have same latency for HImode and SImode multiply,
4097 but 386 and 486 do HImode multiply faster. */
4098 return ix86_tune > PROCESSOR_I486;
4110 /* Nearly general operand, but accept any const_double, since we wish
4111 to be able to drop them into memory rather than have them get pulled
4115 cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4117 if (mode != VOIDmode && mode != GET_MODE (op))
4119 if (GET_CODE (op) == CONST_DOUBLE)
4121 return general_operand (op, mode);
4124 /* Match an SI or HImode register for a zero_extract. */
4127 ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4130 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4131 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4134 if (!register_operand (op, VOIDmode))
4137 /* Be careful to accept only registers having upper parts. */
4138 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4139 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4142 /* Return 1 if this is a valid binary floating-point operation.
4143 OP is the expression matched, and MODE is its mode. */
4146 binary_fp_operator (rtx op, enum machine_mode mode)
4148 if (mode != VOIDmode && mode != GET_MODE (op))
4151 switch (GET_CODE (op))
4157 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4165 mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4167 return GET_CODE (op) == MULT;
4171 div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4173 return GET_CODE (op) == DIV;
4177 arith_or_logical_operator (rtx op, enum machine_mode mode)
4179 return ((mode == VOIDmode || GET_MODE (op) == mode)
4180 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4181 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4184 /* Returns 1 if OP is memory operand with a displacement. */
4187 memory_displacement_operand (rtx op, enum machine_mode mode)
4189 struct ix86_address parts;
4191 if (! memory_operand (op, mode))
4194 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4197 return parts.disp != NULL_RTX;
4200 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4201 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4203 ??? It seems likely that this will only work because cmpsi is an
4204 expander, and no actual insns use this. */
4207 cmpsi_operand (rtx op, enum machine_mode mode)
4209 if (nonimmediate_operand (op, mode))
4212 if (GET_CODE (op) == AND
4213 && GET_MODE (op) == SImode
4214 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4215 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4216 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4217 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4218 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4219 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4225 /* Returns 1 if OP is memory operand that can not be represented by the
4229 long_memory_operand (rtx op, enum machine_mode mode)
4231 if (! memory_operand (op, mode))
4234 return memory_address_length (op) != 0;
4237 /* Return nonzero if the rtx is known aligned. */
4240 aligned_operand (rtx op, enum machine_mode mode)
4242 struct ix86_address parts;
4244 if (!general_operand (op, mode))
4247 /* Registers and immediate operands are always "aligned". */
4248 if (GET_CODE (op) != MEM)
4251 /* Don't even try to do any aligned optimizations with volatiles. */
4252 if (MEM_VOLATILE_P (op))
4257 /* Pushes and pops are only valid on the stack pointer. */
4258 if (GET_CODE (op) == PRE_DEC
4259 || GET_CODE (op) == POST_INC)
4262 /* Decode the address. */
4263 if (! ix86_decompose_address (op, &parts))
4266 /* Look for some component that isn't known to be aligned. */
4270 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4275 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4280 if (GET_CODE (parts.disp) != CONST_INT
4281 || (INTVAL (parts.disp) & 3) != 0)
4285 /* Didn't find one -- this must be an aligned address. */
4289 /* Initialize the table of extra 80387 mathematical constants. */
4292 init_ext_80387_constants (void)
4294 static const char * cst[5] =
4296 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4297 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4298 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4299 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4300 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4304 for (i = 0; i < 5; i++)
4306 real_from_string (&ext_80387_constants_table[i], cst[i]);
4307 /* Ensure each constant is rounded to XFmode precision. */
4308 real_convert (&ext_80387_constants_table[i],
4309 XFmode, &ext_80387_constants_table[i]);
4312 ext_80387_constants_init = 1;
4315 /* Return true if the constant is something that can be loaded with
4316 a special instruction. */
4319 standard_80387_constant_p (rtx x)
4321 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4324 if (x == CONST0_RTX (GET_MODE (x)))
4326 if (x == CONST1_RTX (GET_MODE (x)))
4329 /* For XFmode constants, try to find a special 80387 instruction on
4330 those CPUs that benefit from them. */
4331 if (GET_MODE (x) == XFmode
4332 && x86_ext_80387_constants & TUNEMASK)
4337 if (! ext_80387_constants_init)
4338 init_ext_80387_constants ();
4340 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4341 for (i = 0; i < 5; i++)
4342 if (real_identical (&r, &ext_80387_constants_table[i]))
4349 /* Return the opcode of the special instruction to be used to load
4353 standard_80387_constant_opcode (rtx x)
4355 switch (standard_80387_constant_p (x))
4375 /* Return the CONST_DOUBLE representing the 80387 constant that is
4376 loaded by the specified special instruction. The argument IDX
4377 matches the return value from standard_80387_constant_p. */
4380 standard_80387_constant_rtx (int idx)
4384 if (! ext_80387_constants_init)
4385 init_ext_80387_constants ();
4401 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4405 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4408 standard_sse_constant_p (rtx x)
4410 if (x == const0_rtx)
4412 return (x == CONST0_RTX (GET_MODE (x)));
4415 /* Returns 1 if OP contains a symbol reference */
4418 symbolic_reference_mentioned_p (rtx op)
4423 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4426 fmt = GET_RTX_FORMAT (GET_CODE (op));
4427 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4433 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4434 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4438 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4445 /* Return 1 if it is appropriate to emit `ret' instructions in the
4446 body of a function. Do this only if the epilogue is simple, needing a
4447 couple of insns. Prior to reloading, we can't tell how many registers
4448 must be saved, so return 0 then. Return 0 if there is no frame
4449 marker to de-allocate.
4451 If NON_SAVING_SETJMP is defined and true, then it is not possible
4452 for the epilogue to be simple, so return 0. This is a special case
4453 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4454 until final, but jump_optimize may need to know sooner if a
4458 ix86_can_use_return_insn_p (void)
4460 struct ix86_frame frame;
4462 #ifdef NON_SAVING_SETJMP
4463 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4467 if (! reload_completed || frame_pointer_needed)
4470 /* Don't allow more than 32 pop, since that's all we can do
4471 with one instruction. */
4472 if (current_function_pops_args
4473 && current_function_args_size >= 32768)
4476 ix86_compute_frame_layout (&frame);
4477 return frame.to_allocate == 0 && frame.nregs == 0;
4480 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4482 x86_64_sign_extended_value (rtx value)
4484 switch (GET_CODE (value))
4486 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4487 to be at least 32 and this all acceptable constants are
4488 represented as CONST_INT. */
4490 if (HOST_BITS_PER_WIDE_INT == 32)
4494 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4495 return trunc_int_for_mode (val, SImode) == val;
4499 /* For certain code models, the symbolic references are known to fit.
4500 in CM_SMALL_PIC model we know it fits if it is local to the shared
4501 library. Don't count TLS SYMBOL_REFs here, since they should fit
4502 only if inside of UNSPEC handled below. */
4504 /* TLS symbols are not constant. */
4505 if (tls_symbolic_operand (value, Pmode))
4507 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4509 /* For certain code models, the code is near as well. */
4511 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4512 || ix86_cmodel == CM_KERNEL);
4514 /* We also may accept the offsetted memory references in certain special
4517 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4518 switch (XINT (XEXP (value, 0), 1))
4520 case UNSPEC_GOTPCREL:
4522 case UNSPEC_GOTNTPOFF:
4528 if (GET_CODE (XEXP (value, 0)) == PLUS)
4530 rtx op1 = XEXP (XEXP (value, 0), 0);
4531 rtx op2 = XEXP (XEXP (value, 0), 1);
4532 HOST_WIDE_INT offset;
4534 if (ix86_cmodel == CM_LARGE)
4536 if (GET_CODE (op2) != CONST_INT)
4538 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4539 switch (GET_CODE (op1))
4542 /* For CM_SMALL assume that latest object is 16MB before
4543 end of 31bits boundary. We may also accept pretty
4544 large negative constants knowing that all objects are
4545 in the positive half of address space. */
4546 if (ix86_cmodel == CM_SMALL
4547 && offset < 16*1024*1024
4548 && trunc_int_for_mode (offset, SImode) == offset)
4550 /* For CM_KERNEL we know that all object resist in the
4551 negative half of 32bits address space. We may not
4552 accept negative offsets, since they may be just off
4553 and we may accept pretty large positive ones. */
4554 if (ix86_cmodel == CM_KERNEL
4556 && trunc_int_for_mode (offset, SImode) == offset)
4560 /* These conditions are similar to SYMBOL_REF ones, just the
4561 constraints for code models differ. */
4562 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4563 && offset < 16*1024*1024
4564 && trunc_int_for_mode (offset, SImode) == offset)
4566 if (ix86_cmodel == CM_KERNEL
4568 && trunc_int_for_mode (offset, SImode) == offset)
4572 switch (XINT (op1, 1))
4577 && trunc_int_for_mode (offset, SImode) == offset)
4591 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4593 x86_64_zero_extended_value (rtx value)
4595 switch (GET_CODE (value))
4598 if (HOST_BITS_PER_WIDE_INT == 32)
4599 return (GET_MODE (value) == VOIDmode
4600 && !CONST_DOUBLE_HIGH (value));
4604 if (HOST_BITS_PER_WIDE_INT == 32)
4605 return INTVAL (value) >= 0;
4607 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4610 /* For certain code models, the symbolic references are known to fit. */
4612 /* TLS symbols are not constant. */
4613 if (tls_symbolic_operand (value, Pmode))
4615 return ix86_cmodel == CM_SMALL;
4617 /* For certain code models, the code is near as well. */
4619 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4621 /* We also may accept the offsetted memory references in certain special
4624 if (GET_CODE (XEXP (value, 0)) == PLUS)
4626 rtx op1 = XEXP (XEXP (value, 0), 0);
4627 rtx op2 = XEXP (XEXP (value, 0), 1);
4629 if (ix86_cmodel == CM_LARGE)
4631 switch (GET_CODE (op1))
4635 /* For small code model we may accept pretty large positive
4636 offsets, since one bit is available for free. Negative
4637 offsets are limited by the size of NULL pointer area
4638 specified by the ABI. */
4639 if (ix86_cmodel == CM_SMALL
4640 && GET_CODE (op2) == CONST_INT
4641 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4642 && (trunc_int_for_mode (INTVAL (op2), SImode)
4645 /* ??? For the kernel, we may accept adjustment of
4646 -0x10000000, since we know that it will just convert
4647 negative address space to positive, but perhaps this
4648 is not worthwhile. */
4651 /* These conditions are similar to SYMBOL_REF ones, just the
4652 constraints for code models differ. */
4653 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4654 && GET_CODE (op2) == CONST_INT
4655 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4656 && (trunc_int_for_mode (INTVAL (op2), SImode)
4670 /* Value should be nonzero if functions must have frame pointers.
4671 Zero means the frame pointer need not be set up (and parms may
4672 be accessed via the stack pointer) in functions that seem suitable. */
4675 ix86_frame_pointer_required (void)
4677 /* If we accessed previous frames, then the generated code expects
4678 to be able to access the saved ebp value in our frame. */
4679 if (cfun->machine->accesses_prev_frame)
4682 /* Several x86 os'es need a frame pointer for other reasons,
4683 usually pertaining to setjmp. */
4684 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4687 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4688 the frame pointer by default. Turn it back on now if we've not
4689 got a leaf function. */
4690 if (TARGET_OMIT_LEAF_FRAME_POINTER
4691 && (!current_function_is_leaf))
4694 if (current_function_profile)
4700 /* Record that the current function accesses previous call frames. */
4703 ix86_setup_frame_addresses (void)
4705 cfun->machine->accesses_prev_frame = 1;
4708 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4709 # define USE_HIDDEN_LINKONCE 1
4711 # define USE_HIDDEN_LINKONCE 0
4714 static int pic_labels_used;
4716 /* Fills in the label name that should be used for a pc thunk for
4717 the given register. */
4720 get_pc_thunk_name (char name[32], unsigned int regno)
4722 if (USE_HIDDEN_LINKONCE)
4723 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4725 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4729 /* This function generates code for -fpic that loads %ebx with
4730 the return address of the caller and then returns. */
4733 ix86_file_end (void)
4738 for (regno = 0; regno < 8; ++regno)
4742 if (! ((pic_labels_used >> regno) & 1))
4745 get_pc_thunk_name (name, regno);
4747 if (USE_HIDDEN_LINKONCE)
4751 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4753 TREE_PUBLIC (decl) = 1;
4754 TREE_STATIC (decl) = 1;
4755 DECL_ONE_ONLY (decl) = 1;
4757 (*targetm.asm_out.unique_section) (decl, 0);
4758 named_section (decl, NULL, 0);
4760 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4761 fputs ("\t.hidden\t", asm_out_file);
4762 assemble_name (asm_out_file, name);
4763 fputc ('\n', asm_out_file);
4764 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4769 ASM_OUTPUT_LABEL (asm_out_file, name);
4772 xops[0] = gen_rtx_REG (SImode, regno);
4773 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4774 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4775 output_asm_insn ("ret", xops);
4778 if (NEED_INDICATE_EXEC_STACK)
4779 file_end_indicate_exec_stack ();
4782 /* Emit code for the SET_GOT patterns. */
4785 output_set_got (rtx dest)
4790 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4792 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4794 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4797 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4799 output_asm_insn ("call\t%a2", xops);
4802 /* Output the "canonical" label name ("Lxx$pb") here too. This
4803 is what will be referred to by the Mach-O PIC subsystem. */
4804 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4806 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4807 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4810 output_asm_insn ("pop{l}\t%0", xops);
4815 get_pc_thunk_name (name, REGNO (dest));
4816 pic_labels_used |= 1 << REGNO (dest);
4818 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4819 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4820 output_asm_insn ("call\t%X2", xops);
4823 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4824 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4825 else if (!TARGET_MACHO)
4826 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4831 /* Generate an "push" pattern for input ARG. */
4836 return gen_rtx_SET (VOIDmode,
4838 gen_rtx_PRE_DEC (Pmode,
4839 stack_pointer_rtx)),
4843 /* Return >= 0 if there is an unused call-clobbered register available
4844 for the entire function. */
4847 ix86_select_alt_pic_regnum (void)
4849 if (current_function_is_leaf && !current_function_profile)
4852 for (i = 2; i >= 0; --i)
4853 if (!regs_ever_live[i])
4857 return INVALID_REGNUM;
4860 /* Return 1 if we need to save REGNO. */
4862 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4864 if (pic_offset_table_rtx
4865 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4866 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4867 || current_function_profile
4868 || current_function_calls_eh_return
4869 || current_function_uses_const_pool))
4871 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4876 if (current_function_calls_eh_return && maybe_eh_return)
4881 unsigned test = EH_RETURN_DATA_REGNO (i);
4882 if (test == INVALID_REGNUM)
4889 return (regs_ever_live[regno]
4890 && !call_used_regs[regno]
4891 && !fixed_regs[regno]
4892 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4895 /* Return number of registers to be saved on the stack. */
4898 ix86_nsaved_regs (void)
4903 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4904 if (ix86_save_reg (regno, true))
4909 /* Return the offset between two registers, one to be eliminated, and the other
4910 its replacement, at the start of a routine. */
4913 ix86_initial_elimination_offset (int from, int to)
4915 struct ix86_frame frame;
4916 ix86_compute_frame_layout (&frame);
4918 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4919 return frame.hard_frame_pointer_offset;
4920 else if (from == FRAME_POINTER_REGNUM
4921 && to == HARD_FRAME_POINTER_REGNUM)
4922 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4925 if (to != STACK_POINTER_REGNUM)
4927 else if (from == ARG_POINTER_REGNUM)
4928 return frame.stack_pointer_offset;
4929 else if (from != FRAME_POINTER_REGNUM)
4932 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4936 /* Fill structure ix86_frame about frame of currently computed function. */
4939 ix86_compute_frame_layout (struct ix86_frame *frame)
4941 HOST_WIDE_INT total_size;
4942 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4943 HOST_WIDE_INT offset;
4944 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4945 HOST_WIDE_INT size = get_frame_size ();
4947 frame->nregs = ix86_nsaved_regs ();
4950 /* During reload iteration the amount of registers saved can change.
4951 Recompute the value as needed. Do not recompute when amount of registers
4952 didn't change as reload does mutiple calls to the function and does not
4953 expect the decision to change within single iteration. */
4955 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4957 int count = frame->nregs;
4959 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4960 /* The fast prologue uses move instead of push to save registers. This
4961 is significantly longer, but also executes faster as modern hardware
4962 can execute the moves in parallel, but can't do that for push/pop.
4964 Be careful about choosing what prologue to emit: When function takes
4965 many instructions to execute we may use slow version as well as in
4966 case function is known to be outside hot spot (this is known with
4967 feedback only). Weight the size of function by number of registers
4968 to save as it is cheap to use one or two push instructions but very
4969 slow to use many of them. */
4971 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4972 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4973 || (flag_branch_probabilities
4974 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4975 cfun->machine->use_fast_prologue_epilogue = false;
4977 cfun->machine->use_fast_prologue_epilogue
4978 = !expensive_function_p (count);
4980 if (TARGET_PROLOGUE_USING_MOVE
4981 && cfun->machine->use_fast_prologue_epilogue)
4982 frame->save_regs_using_mov = true;
4984 frame->save_regs_using_mov = false;
4987 /* Skip return address and saved base pointer. */
4988 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4990 frame->hard_frame_pointer_offset = offset;
4992 /* Do some sanity checking of stack_alignment_needed and
4993 preferred_alignment, since i386 port is the only using those features
4994 that may break easily. */
4996 if (size && !stack_alignment_needed)
4998 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5000 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5002 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5005 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5006 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5008 /* Register save area */
5009 offset += frame->nregs * UNITS_PER_WORD;
5012 if (ix86_save_varrargs_registers)
5014 offset += X86_64_VARARGS_SIZE;
5015 frame->va_arg_size = X86_64_VARARGS_SIZE;
5018 frame->va_arg_size = 0;
5020 /* Align start of frame for local function. */
5021 frame->padding1 = ((offset + stack_alignment_needed - 1)
5022 & -stack_alignment_needed) - offset;
5024 offset += frame->padding1;
5026 /* Frame pointer points here. */
5027 frame->frame_pointer_offset = offset;
5031 /* Add outgoing arguments area. Can be skipped if we eliminated
5032 all the function calls as dead code.
5033 Skipping is however impossible when function calls alloca. Alloca
5034 expander assumes that last current_function_outgoing_args_size
5035 of stack frame are unused. */
5036 if (ACCUMULATE_OUTGOING_ARGS
5037 && (!current_function_is_leaf || current_function_calls_alloca))
5039 offset += current_function_outgoing_args_size;
5040 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5043 frame->outgoing_arguments_size = 0;
5045 /* Align stack boundary. Only needed if we're calling another function
5047 if (!current_function_is_leaf || current_function_calls_alloca)
5048 frame->padding2 = ((offset + preferred_alignment - 1)
5049 & -preferred_alignment) - offset;
5051 frame->padding2 = 0;
5053 offset += frame->padding2;
5055 /* We've reached end of stack frame. */
5056 frame->stack_pointer_offset = offset;
5058 /* Size prologue needs to allocate. */
5059 frame->to_allocate =
5060 (size + frame->padding1 + frame->padding2
5061 + frame->outgoing_arguments_size + frame->va_arg_size);
5063 if ((!frame->to_allocate && frame->nregs <= 1)
5064 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5065 frame->save_regs_using_mov = false;
5067 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5068 && current_function_is_leaf)
5070 frame->red_zone_size = frame->to_allocate;
5071 if (frame->save_regs_using_mov)
5072 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5073 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5074 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5077 frame->red_zone_size = 0;
5078 frame->to_allocate -= frame->red_zone_size;
5079 frame->stack_pointer_offset -= frame->red_zone_size;
5081 fprintf (stderr, "nregs: %i\n", frame->nregs);
5082 fprintf (stderr, "size: %i\n", size);
5083 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5084 fprintf (stderr, "padding1: %i\n", frame->padding1);
5085 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5086 fprintf (stderr, "padding2: %i\n", frame->padding2);
5087 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5088 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5089 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5090 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5091 frame->hard_frame_pointer_offset);
5092 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5096 /* Emit code to save registers in the prologue. */
5099 ix86_emit_save_regs (void)
5104 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5105 if (ix86_save_reg (regno, true))
5107 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5108 RTX_FRAME_RELATED_P (insn) = 1;
5112 /* Emit code to save registers using MOV insns. First register
5113 is restored from POINTER + OFFSET. */
5115 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5120 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5121 if (ix86_save_reg (regno, true))
5123 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5125 gen_rtx_REG (Pmode, regno));
5126 RTX_FRAME_RELATED_P (insn) = 1;
5127 offset += UNITS_PER_WORD;
5131 /* Expand prologue or epilogue stack adjustment.
5132 The pattern exist to put a dependency on all ebp-based memory accesses.
5133 STYLE should be negative if instructions should be marked as frame related,
5134 zero if %r11 register is live and cannot be freely used and positive
5138 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5143 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5144 else if (x86_64_immediate_operand (offset, DImode))
5145 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5149 /* r11 is used by indirect sibcall return as well, set before the
5150 epilogue and used after the epilogue. ATM indirect sibcall
5151 shouldn't be used together with huge frame sizes in one
5152 function because of the frame_size check in sibcall.c. */
5155 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5156 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5158 RTX_FRAME_RELATED_P (insn) = 1;
5159 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5163 RTX_FRAME_RELATED_P (insn) = 1;
5166 /* Expand the prologue into a bunch of separate insns. */
5169 ix86_expand_prologue (void)
5173 struct ix86_frame frame;
5174 HOST_WIDE_INT allocate;
5176 ix86_compute_frame_layout (&frame);
5178 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5179 slower on all targets. Also sdb doesn't like it. */
5181 if (frame_pointer_needed)
5183 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5184 RTX_FRAME_RELATED_P (insn) = 1;
5186 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5187 RTX_FRAME_RELATED_P (insn) = 1;
5190 allocate = frame.to_allocate;
5192 if (!frame.save_regs_using_mov)
5193 ix86_emit_save_regs ();
5195 allocate += frame.nregs * UNITS_PER_WORD;
5197 /* When using red zone we may start register saving before allocating
5198 the stack frame saving one cycle of the prologue. */
5199 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5200 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5201 : stack_pointer_rtx,
5202 -frame.nregs * UNITS_PER_WORD);
5206 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5207 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5208 GEN_INT (-allocate), -1);
5211 /* Only valid for Win32. */
5212 rtx eax = gen_rtx_REG (SImode, 0);
5213 bool eax_live = ix86_eax_live_at_start_p ();
5220 emit_insn (gen_push (eax));
5224 insn = emit_move_insn (eax, GEN_INT (allocate));
5225 RTX_FRAME_RELATED_P (insn) = 1;
5227 insn = emit_insn (gen_allocate_stack_worker (eax));
5228 RTX_FRAME_RELATED_P (insn) = 1;
5232 rtx t = plus_constant (stack_pointer_rtx, allocate);
5233 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5237 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5239 if (!frame_pointer_needed || !frame.to_allocate)
5240 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5242 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5243 -frame.nregs * UNITS_PER_WORD);
5246 pic_reg_used = false;
5247 if (pic_offset_table_rtx
5248 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5249 || current_function_profile))
5251 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5253 if (alt_pic_reg_used != INVALID_REGNUM)
5254 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5256 pic_reg_used = true;
5261 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5263 /* Even with accurate pre-reload life analysis, we can wind up
5264 deleting all references to the pic register after reload.
5265 Consider if cross-jumping unifies two sides of a branch
5266 controlled by a comparison vs the only read from a global.
5267 In which case, allow the set_got to be deleted, though we're
5268 too late to do anything about the ebx save in the prologue. */
5269 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5272 /* Prevent function calls from be scheduled before the call to mcount.
5273 In the pic_reg_used case, make sure that the got load isn't deleted. */
5274 if (current_function_profile)
5275 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5278 /* Emit code to restore saved registers using MOV insns. First register
5279 is restored from POINTER + OFFSET. */
5281 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5282 int maybe_eh_return)
5285 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5287 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5288 if (ix86_save_reg (regno, maybe_eh_return))
5290 /* Ensure that adjust_address won't be forced to produce pointer
5291 out of range allowed by x86-64 instruction set. */
5292 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5296 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5297 emit_move_insn (r11, GEN_INT (offset));
5298 emit_insn (gen_adddi3 (r11, r11, pointer));
5299 base_address = gen_rtx_MEM (Pmode, r11);
5302 emit_move_insn (gen_rtx_REG (Pmode, regno),
5303 adjust_address (base_address, Pmode, offset));
5304 offset += UNITS_PER_WORD;
5308 /* Restore function stack, frame, and registers. */
5311 ix86_expand_epilogue (int style)
5314 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5315 struct ix86_frame frame;
5316 HOST_WIDE_INT offset;
5318 ix86_compute_frame_layout (&frame);
5320 /* Calculate start of saved registers relative to ebp. Special care
5321 must be taken for the normal return case of a function using
5322 eh_return: the eax and edx registers are marked as saved, but not
5323 restored along this path. */
5324 offset = frame.nregs;
5325 if (current_function_calls_eh_return && style != 2)
5327 offset *= -UNITS_PER_WORD;
5329 /* If we're only restoring one register and sp is not valid then
5330 using a move instruction to restore the register since it's
5331 less work than reloading sp and popping the register.
5333 The default code result in stack adjustment using add/lea instruction,
5334 while this code results in LEAVE instruction (or discrete equivalent),
5335 so it is profitable in some other cases as well. Especially when there
5336 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5337 and there is exactly one register to pop. This heuristic may need some
5338 tuning in future. */
5339 if ((!sp_valid && frame.nregs <= 1)
5340 || (TARGET_EPILOGUE_USING_MOVE
5341 && cfun->machine->use_fast_prologue_epilogue
5342 && (frame.nregs > 1 || frame.to_allocate))
5343 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5344 || (frame_pointer_needed && TARGET_USE_LEAVE
5345 && cfun->machine->use_fast_prologue_epilogue
5346 && frame.nregs == 1)
5347 || current_function_calls_eh_return)
5349 /* Restore registers. We can use ebp or esp to address the memory
5350 locations. If both are available, default to ebp, since offsets
5351 are known to be small. Only exception is esp pointing directly to the
5352 end of block of saved registers, where we may simplify addressing
5355 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5356 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5357 frame.to_allocate, style == 2);
5359 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5360 offset, style == 2);
5362 /* eh_return epilogues need %ecx added to the stack pointer. */
5365 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5367 if (frame_pointer_needed)
5369 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5370 tmp = plus_constant (tmp, UNITS_PER_WORD);
5371 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5373 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5374 emit_move_insn (hard_frame_pointer_rtx, tmp);
5376 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5381 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5382 tmp = plus_constant (tmp, (frame.to_allocate
5383 + frame.nregs * UNITS_PER_WORD));
5384 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5387 else if (!frame_pointer_needed)
5388 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5389 GEN_INT (frame.to_allocate
5390 + frame.nregs * UNITS_PER_WORD),
5392 /* If not an i386, mov & pop is faster than "leave". */
5393 else if (TARGET_USE_LEAVE || optimize_size
5394 || !cfun->machine->use_fast_prologue_epilogue)
5395 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5398 pro_epilogue_adjust_stack (stack_pointer_rtx,
5399 hard_frame_pointer_rtx,
5402 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5404 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5409 /* First step is to deallocate the stack frame so that we can
5410 pop the registers. */
5413 if (!frame_pointer_needed)
5415 pro_epilogue_adjust_stack (stack_pointer_rtx,
5416 hard_frame_pointer_rtx,
5417 GEN_INT (offset), style);
5419 else if (frame.to_allocate)
5420 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5421 GEN_INT (frame.to_allocate), style);
5423 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5424 if (ix86_save_reg (regno, false))
5427 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5429 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5431 if (frame_pointer_needed)
5433 /* Leave results in shorter dependency chains on CPUs that are
5434 able to grok it fast. */
5435 if (TARGET_USE_LEAVE)
5436 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5437 else if (TARGET_64BIT)
5438 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5440 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5444 /* Sibcall epilogues don't want a return instruction. */
5448 if (current_function_pops_args && current_function_args_size)
5450 rtx popc = GEN_INT (current_function_pops_args);
5452 /* i386 can only pop 64K bytes. If asked to pop more, pop
5453 return address, do explicit add, and jump indirectly to the
5456 if (current_function_pops_args >= 65536)
5458 rtx ecx = gen_rtx_REG (SImode, 2);
5460 /* There is no "pascal" calling convention in 64bit ABI. */
5464 emit_insn (gen_popsi1 (ecx));
5465 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5466 emit_jump_insn (gen_return_indirect_internal (ecx));
5469 emit_jump_insn (gen_return_pop_internal (popc));
5472 emit_jump_insn (gen_return_internal ());
5475 /* Reset from the function's potential modifications. */
5478 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5479 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5481 if (pic_offset_table_rtx)
5482 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5485 /* Extract the parts of an RTL expression that is a valid memory address
5486 for an instruction. Return 0 if the structure of the address is
5487 grossly off. Return -1 if the address contains ASHIFT, so it is not
5488 strictly valid, but still used for computing length of lea instruction. */
5491 ix86_decompose_address (rtx addr, struct ix86_address *out)
5493 rtx base = NULL_RTX;
5494 rtx index = NULL_RTX;
5495 rtx disp = NULL_RTX;
5496 HOST_WIDE_INT scale = 1;
5497 rtx scale_rtx = NULL_RTX;
5499 enum ix86_address_seg seg = SEG_DEFAULT;
5501 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5503 else if (GET_CODE (addr) == PLUS)
5513 addends[n++] = XEXP (op, 1);
5516 while (GET_CODE (op) == PLUS);
5521 for (i = n; i >= 0; --i)
5524 switch (GET_CODE (op))
5529 index = XEXP (op, 0);
5530 scale_rtx = XEXP (op, 1);
5534 if (XINT (op, 1) == UNSPEC_TP
5535 && TARGET_TLS_DIRECT_SEG_REFS
5536 && seg == SEG_DEFAULT)
5537 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5566 else if (GET_CODE (addr) == MULT)
5568 index = XEXP (addr, 0); /* index*scale */
5569 scale_rtx = XEXP (addr, 1);
5571 else if (GET_CODE (addr) == ASHIFT)
5575 /* We're called for lea too, which implements ashift on occasion. */
5576 index = XEXP (addr, 0);
5577 tmp = XEXP (addr, 1);
5578 if (GET_CODE (tmp) != CONST_INT)
5580 scale = INTVAL (tmp);
5581 if ((unsigned HOST_WIDE_INT) scale > 3)
5587 disp = addr; /* displacement */
5589 /* Extract the integral value of scale. */
5592 if (GET_CODE (scale_rtx) != CONST_INT)
5594 scale = INTVAL (scale_rtx);
5597 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5598 if (base && index && scale == 1
5599 && (index == arg_pointer_rtx
5600 || index == frame_pointer_rtx
5601 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5608 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5609 if ((base == hard_frame_pointer_rtx
5610 || base == frame_pointer_rtx
5611 || base == arg_pointer_rtx) && !disp)
5614 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5615 Avoid this by transforming to [%esi+0]. */
5616 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5617 && base && !index && !disp
5619 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5622 /* Special case: encode reg+reg instead of reg*2. */
5623 if (!base && index && scale && scale == 2)
5624 base = index, scale = 1;
5626 /* Special case: scaling cannot be encoded without base or displacement. */
5627 if (!base && !disp && index && scale != 1)
5639 /* Return cost of the memory address x.
5640 For i386, it is better to use a complex address than let gcc copy
5641 the address into a reg and make a new pseudo. But not if the address
5642 requires to two regs - that would mean more pseudos with longer
5645 ix86_address_cost (rtx x)
5647 struct ix86_address parts;
5650 if (!ix86_decompose_address (x, &parts))
5653 /* More complex memory references are better. */
5654 if (parts.disp && parts.disp != const0_rtx)
5656 if (parts.seg != SEG_DEFAULT)
5659 /* Attempt to minimize number of registers in the address. */
5661 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5663 && (!REG_P (parts.index)
5664 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5668 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5670 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5671 && parts.base != parts.index)
5674 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5675 since it's predecode logic can't detect the length of instructions
5676 and it degenerates to vector decoded. Increase cost of such
5677 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5678 to split such addresses or even refuse such addresses at all.
5680 Following addressing modes are affected:
5685 The first and last case may be avoidable by explicitly coding the zero in
5686 memory address, but I don't have AMD-K6 machine handy to check this
5690 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5691 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5692 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5698 /* If X is a machine specific address (i.e. a symbol or label being
5699 referenced as a displacement from the GOT implemented using an
5700 UNSPEC), then return the base term. Otherwise return X. */
5703 ix86_find_base_term (rtx x)
5709 if (GET_CODE (x) != CONST)
5712 if (GET_CODE (term) == PLUS
5713 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5714 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5715 term = XEXP (term, 0);
5716 if (GET_CODE (term) != UNSPEC
5717 || XINT (term, 1) != UNSPEC_GOTPCREL)
5720 term = XVECEXP (term, 0, 0);
5722 if (GET_CODE (term) != SYMBOL_REF
5723 && GET_CODE (term) != LABEL_REF)
5729 term = ix86_delegitimize_address (x);
5731 if (GET_CODE (term) != SYMBOL_REF
5732 && GET_CODE (term) != LABEL_REF)
5738 /* Determine if a given RTX is a valid constant. We already know this
5739 satisfies CONSTANT_P. */
5742 legitimate_constant_p (rtx x)
5746 switch (GET_CODE (x))
5749 /* TLS symbols are not constant. */
5750 if (tls_symbolic_operand (x, Pmode))
5755 inner = XEXP (x, 0);
5757 /* Offsets of TLS symbols are never valid.
5758 Discourage CSE from creating them. */
5759 if (GET_CODE (inner) == PLUS
5760 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5763 if (GET_CODE (inner) == PLUS)
5765 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5767 inner = XEXP (inner, 0);
5770 /* Only some unspecs are valid as "constants". */
5771 if (GET_CODE (inner) == UNSPEC)
5772 switch (XINT (inner, 1))
5776 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5778 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5788 /* Otherwise we handle everything else in the move patterns. */
5792 /* Determine if it's legal to put X into the constant pool. This
5793 is not possible for the address of thread-local symbols, which
5794 is checked above. */
5797 ix86_cannot_force_const_mem (rtx x)
5799 return !legitimate_constant_p (x);
5802 /* Determine if a given RTX is a valid constant address. */
5805 constant_address_p (rtx x)
5807 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5810 /* Nonzero if the constant value X is a legitimate general operand
5811 when generating PIC code. It is given that flag_pic is on and
5812 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5815 legitimate_pic_operand_p (rtx x)
5819 switch (GET_CODE (x))
5822 inner = XEXP (x, 0);
5824 /* Only some unspecs are valid as "constants". */
5825 if (GET_CODE (inner) == UNSPEC)
5826 switch (XINT (inner, 1))
5829 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5837 return legitimate_pic_address_disp_p (x);
5844 /* Determine if a given CONST RTX is a valid memory displacement
5848 legitimate_pic_address_disp_p (rtx disp)
5852 /* In 64bit mode we can allow direct addresses of symbols and labels
5853 when they are not dynamic symbols. */
5856 /* TLS references should always be enclosed in UNSPEC. */
5857 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5859 if (GET_CODE (disp) == SYMBOL_REF
5860 && ix86_cmodel == CM_SMALL_PIC
5861 && SYMBOL_REF_LOCAL_P (disp))
5863 if (GET_CODE (disp) == LABEL_REF)
5865 if (GET_CODE (disp) == CONST
5866 && GET_CODE (XEXP (disp, 0)) == PLUS)
5868 rtx op0 = XEXP (XEXP (disp, 0), 0);
5869 rtx op1 = XEXP (XEXP (disp, 0), 1);
5871 /* TLS references should always be enclosed in UNSPEC. */
5872 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5874 if (((GET_CODE (op0) == SYMBOL_REF
5875 && ix86_cmodel == CM_SMALL_PIC
5876 && SYMBOL_REF_LOCAL_P (op0))
5877 || GET_CODE (op0) == LABEL_REF)
5878 && GET_CODE (op1) == CONST_INT
5879 && INTVAL (op1) < 16*1024*1024
5880 && INTVAL (op1) >= -16*1024*1024)
5884 if (GET_CODE (disp) != CONST)
5886 disp = XEXP (disp, 0);
5890 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5891 of GOT tables. We should not need these anyway. */
5892 if (GET_CODE (disp) != UNSPEC
5893 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5896 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5897 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5903 if (GET_CODE (disp) == PLUS)
5905 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5907 disp = XEXP (disp, 0);
5911 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5912 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5914 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5915 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5916 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5918 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5919 if (! strcmp (sym_name, "<pic base>"))
5924 if (GET_CODE (disp) != UNSPEC)
5927 switch (XINT (disp, 1))
5932 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5934 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5935 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5936 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5938 case UNSPEC_GOTTPOFF:
5939 case UNSPEC_GOTNTPOFF:
5940 case UNSPEC_INDNTPOFF:
5943 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5945 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5947 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5953 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5954 memory address for an instruction. The MODE argument is the machine mode
5955 for the MEM expression that wants to use this address.
5957 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5958 convert common non-canonical forms to canonical form so that they will
5962 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5964 struct ix86_address parts;
5965 rtx base, index, disp;
5966 HOST_WIDE_INT scale;
5967 const char *reason = NULL;
5968 rtx reason_rtx = NULL_RTX;
5970 if (TARGET_DEBUG_ADDR)
5973 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5974 GET_MODE_NAME (mode), strict);
5978 if (ix86_decompose_address (addr, &parts) <= 0)
5980 reason = "decomposition failed";
5985 index = parts.index;
5987 scale = parts.scale;
5989 /* Validate base register.
5991 Don't allow SUBREG's here, it can lead to spill failures when the base
5992 is one word out of a two word structure, which is represented internally
5999 if (GET_CODE (base) != REG)
6001 reason = "base is not a register";
6005 if (GET_MODE (base) != Pmode)
6007 reason = "base is not in Pmode";
6011 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6012 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6014 reason = "base is not valid";
6019 /* Validate index register.
6021 Don't allow SUBREG's here, it can lead to spill failures when the index
6022 is one word out of a two word structure, which is represented internally
6029 if (GET_CODE (index) != REG)
6031 reason = "index is not a register";
6035 if (GET_MODE (index) != Pmode)
6037 reason = "index is not in Pmode";
6041 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6042 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6044 reason = "index is not valid";
6049 /* Validate scale factor. */
6052 reason_rtx = GEN_INT (scale);
6055 reason = "scale without index";
6059 if (scale != 2 && scale != 4 && scale != 8)
6061 reason = "scale is not a valid multiplier";
6066 /* Validate displacement. */
6071 if (GET_CODE (disp) == CONST
6072 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6073 switch (XINT (XEXP (disp, 0), 1))
6077 case UNSPEC_GOTPCREL:
6080 goto is_legitimate_pic;
6082 case UNSPEC_GOTTPOFF:
6083 case UNSPEC_GOTNTPOFF:
6084 case UNSPEC_INDNTPOFF:
6090 reason = "invalid address unspec";
6094 else if (flag_pic && (SYMBOLIC_CONST (disp)
6096 && !machopic_operand_p (disp)
6101 if (TARGET_64BIT && (index || base))
6103 /* foo@dtpoff(%rX) is ok. */
6104 if (GET_CODE (disp) != CONST
6105 || GET_CODE (XEXP (disp, 0)) != PLUS
6106 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6107 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6108 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6109 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6111 reason = "non-constant pic memory reference";
6115 else if (! legitimate_pic_address_disp_p (disp))
6117 reason = "displacement is an invalid pic construct";
6121 /* This code used to verify that a symbolic pic displacement
6122 includes the pic_offset_table_rtx register.
6124 While this is good idea, unfortunately these constructs may
6125 be created by "adds using lea" optimization for incorrect
6134 This code is nonsensical, but results in addressing
6135 GOT table with pic_offset_table_rtx base. We can't
6136 just refuse it easily, since it gets matched by
6137 "addsi3" pattern, that later gets split to lea in the
6138 case output register differs from input. While this
6139 can be handled by separate addsi pattern for this case
6140 that never results in lea, this seems to be easier and
6141 correct fix for crash to disable this test. */
6143 else if (GET_CODE (disp) != LABEL_REF
6144 && GET_CODE (disp) != CONST_INT
6145 && (GET_CODE (disp) != CONST
6146 || !legitimate_constant_p (disp))
6147 && (GET_CODE (disp) != SYMBOL_REF
6148 || !legitimate_constant_p (disp)))
6150 reason = "displacement is not constant";
6153 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6155 reason = "displacement is out of range";
6160 /* Everything looks valid. */
6161 if (TARGET_DEBUG_ADDR)
6162 fprintf (stderr, "Success.\n");
6166 if (TARGET_DEBUG_ADDR)
6168 fprintf (stderr, "Error: %s\n", reason);
6169 debug_rtx (reason_rtx);
6174 /* Return an unique alias set for the GOT. */
6176 static HOST_WIDE_INT
6177 ix86_GOT_alias_set (void)
6179 static HOST_WIDE_INT set = -1;
6181 set = new_alias_set ();
6185 /* Return a legitimate reference for ORIG (an address) using the
6186 register REG. If REG is 0, a new pseudo is generated.
6188 There are two types of references that must be handled:
6190 1. Global data references must load the address from the GOT, via
6191 the PIC reg. An insn is emitted to do this load, and the reg is
6194 2. Static data references, constant pool addresses, and code labels
6195 compute the address as an offset from the GOT, whose base is in
6196 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6197 differentiate them from global data objects. The returned
6198 address is the PIC reg + an unspec constant.
6200 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6201 reg also appears in the address. */
6204 legitimize_pic_address (rtx orig, rtx reg)
6212 reg = gen_reg_rtx (Pmode);
6213 /* Use the generic Mach-O PIC machinery. */
6214 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6217 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6219 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6221 /* This symbol may be referenced via a displacement from the PIC
6222 base address (@GOTOFF). */
6224 if (reload_in_progress)
6225 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6226 if (GET_CODE (addr) == CONST)
6227 addr = XEXP (addr, 0);
6228 if (GET_CODE (addr) == PLUS)
6230 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6231 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6234 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6235 new = gen_rtx_CONST (Pmode, new);
6236 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6240 emit_move_insn (reg, new);
6244 else if (GET_CODE (addr) == SYMBOL_REF)
6248 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6249 new = gen_rtx_CONST (Pmode, new);
6250 new = gen_rtx_MEM (Pmode, new);
6251 RTX_UNCHANGING_P (new) = 1;
6252 set_mem_alias_set (new, ix86_GOT_alias_set ());
6255 reg = gen_reg_rtx (Pmode);
6256 /* Use directly gen_movsi, otherwise the address is loaded
6257 into register for CSE. We don't want to CSE this addresses,
6258 instead we CSE addresses from the GOT table, so skip this. */
6259 emit_insn (gen_movsi (reg, new));
6264 /* This symbol must be referenced via a load from the
6265 Global Offset Table (@GOT). */
6267 if (reload_in_progress)
6268 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6269 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6270 new = gen_rtx_CONST (Pmode, new);
6271 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6272 new = gen_rtx_MEM (Pmode, new);
6273 RTX_UNCHANGING_P (new) = 1;
6274 set_mem_alias_set (new, ix86_GOT_alias_set ());
6277 reg = gen_reg_rtx (Pmode);
6278 emit_move_insn (reg, new);
6284 if (GET_CODE (addr) == CONST)
6286 addr = XEXP (addr, 0);
6288 /* We must match stuff we generate before. Assume the only
6289 unspecs that can get here are ours. Not that we could do
6290 anything with them anyway.... */
6291 if (GET_CODE (addr) == UNSPEC
6292 || (GET_CODE (addr) == PLUS
6293 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6295 if (GET_CODE (addr) != PLUS)
6298 if (GET_CODE (addr) == PLUS)
6300 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6302 /* Check first to see if this is a constant offset from a @GOTOFF
6303 symbol reference. */
6304 if (local_symbolic_operand (op0, Pmode)
6305 && GET_CODE (op1) == CONST_INT)
6309 if (reload_in_progress)
6310 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6311 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6313 new = gen_rtx_PLUS (Pmode, new, op1);
6314 new = gen_rtx_CONST (Pmode, new);
6315 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6319 emit_move_insn (reg, new);
6325 if (INTVAL (op1) < -16*1024*1024
6326 || INTVAL (op1) >= 16*1024*1024)
6327 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6332 base = legitimize_pic_address (XEXP (addr, 0), reg);
6333 new = legitimize_pic_address (XEXP (addr, 1),
6334 base == reg ? NULL_RTX : reg);
6336 if (GET_CODE (new) == CONST_INT)
6337 new = plus_constant (base, INTVAL (new));
6340 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6342 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6343 new = XEXP (new, 1);
6345 new = gen_rtx_PLUS (Pmode, base, new);
6353 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6356 get_thread_pointer (int to_reg)
6360 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6364 reg = gen_reg_rtx (Pmode);
6365 insn = gen_rtx_SET (VOIDmode, reg, tp);
6366 insn = emit_insn (insn);
6371 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6372 false if we expect this to be used for a memory address and true if
6373 we expect to load the address into a register. */
6376 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6378 rtx dest, base, off, pic;
6383 case TLS_MODEL_GLOBAL_DYNAMIC:
6384 dest = gen_reg_rtx (Pmode);
6387 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6390 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6391 insns = get_insns ();
6394 emit_libcall_block (insns, dest, rax, x);
6397 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6400 case TLS_MODEL_LOCAL_DYNAMIC:
6401 base = gen_reg_rtx (Pmode);
6404 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6407 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6408 insns = get_insns ();
6411 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6412 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6413 emit_libcall_block (insns, base, rax, note);
6416 emit_insn (gen_tls_local_dynamic_base_32 (base));
6418 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6419 off = gen_rtx_CONST (Pmode, off);
6421 return gen_rtx_PLUS (Pmode, base, off);
6423 case TLS_MODEL_INITIAL_EXEC:
6427 type = UNSPEC_GOTNTPOFF;
6431 if (reload_in_progress)
6432 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6433 pic = pic_offset_table_rtx;
6434 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6436 else if (!TARGET_GNU_TLS)
6438 pic = gen_reg_rtx (Pmode);
6439 emit_insn (gen_set_got (pic));
6440 type = UNSPEC_GOTTPOFF;
6445 type = UNSPEC_INDNTPOFF;
6448 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6449 off = gen_rtx_CONST (Pmode, off);
6451 off = gen_rtx_PLUS (Pmode, pic, off);
6452 off = gen_rtx_MEM (Pmode, off);
6453 RTX_UNCHANGING_P (off) = 1;
6454 set_mem_alias_set (off, ix86_GOT_alias_set ());
6456 if (TARGET_64BIT || TARGET_GNU_TLS)
6458 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6459 off = force_reg (Pmode, off);
6460 return gen_rtx_PLUS (Pmode, base, off);
6464 base = get_thread_pointer (true);
6465 dest = gen_reg_rtx (Pmode);
6466 emit_insn (gen_subsi3 (dest, base, off));
6470 case TLS_MODEL_LOCAL_EXEC:
6471 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6472 (TARGET_64BIT || TARGET_GNU_TLS)
6473 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6474 off = gen_rtx_CONST (Pmode, off);
6476 if (TARGET_64BIT || TARGET_GNU_TLS)
6478 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6479 return gen_rtx_PLUS (Pmode, base, off);
6483 base = get_thread_pointer (true);
6484 dest = gen_reg_rtx (Pmode);
6485 emit_insn (gen_subsi3 (dest, base, off));
6496 /* Try machine-dependent ways of modifying an illegitimate address
6497 to be legitimate. If we find one, return the new, valid address.
6498 This macro is used in only one place: `memory_address' in explow.c.
6500 OLDX is the address as it was before break_out_memory_refs was called.
6501 In some cases it is useful to look at this to decide what needs to be done.
6503 MODE and WIN are passed so that this macro can use
6504 GO_IF_LEGITIMATE_ADDRESS.
6506 It is always safe for this macro to do nothing. It exists to recognize
6507 opportunities to optimize the output.
6509 For the 80386, we handle X+REG by loading X into a register R and
6510 using R+REG. R will go in a general reg and indexing will be used.
6511 However, if REG is a broken-out memory address or multiplication,
6512 nothing needs to be done because REG can certainly go in a general reg.
6514 When -fpic is used, special handling is needed for symbolic references.
6515 See comments by legitimize_pic_address in i386.c for details. */
6518 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6523 if (TARGET_DEBUG_ADDR)
6525 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6526 GET_MODE_NAME (mode));
6530 log = tls_symbolic_operand (x, mode);
6532 return legitimize_tls_address (x, log, false);
6534 if (flag_pic && SYMBOLIC_CONST (x))
6535 return legitimize_pic_address (x, 0);
6537 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6538 if (GET_CODE (x) == ASHIFT
6539 && GET_CODE (XEXP (x, 1)) == CONST_INT
6540 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6543 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6544 GEN_INT (1 << log));
6547 if (GET_CODE (x) == PLUS)
6549 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6551 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6552 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6553 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6556 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6557 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6558 GEN_INT (1 << log));
6561 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6562 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6563 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6566 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6567 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6568 GEN_INT (1 << log));
6571 /* Put multiply first if it isn't already. */
6572 if (GET_CODE (XEXP (x, 1)) == MULT)
6574 rtx tmp = XEXP (x, 0);
6575 XEXP (x, 0) = XEXP (x, 1);
6580 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6581 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6582 created by virtual register instantiation, register elimination, and
6583 similar optimizations. */
6584 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6587 x = gen_rtx_PLUS (Pmode,
6588 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6589 XEXP (XEXP (x, 1), 0)),
6590 XEXP (XEXP (x, 1), 1));
6594 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6595 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6596 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6597 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6598 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6599 && CONSTANT_P (XEXP (x, 1)))
6602 rtx other = NULL_RTX;
6604 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6606 constant = XEXP (x, 1);
6607 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6609 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6611 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6612 other = XEXP (x, 1);
6620 x = gen_rtx_PLUS (Pmode,
6621 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6622 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6623 plus_constant (other, INTVAL (constant)));
6627 if (changed && legitimate_address_p (mode, x, FALSE))
6630 if (GET_CODE (XEXP (x, 0)) == MULT)
6633 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6636 if (GET_CODE (XEXP (x, 1)) == MULT)
6639 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6643 && GET_CODE (XEXP (x, 1)) == REG
6644 && GET_CODE (XEXP (x, 0)) == REG)
6647 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6650 x = legitimize_pic_address (x, 0);
6653 if (changed && legitimate_address_p (mode, x, FALSE))
6656 if (GET_CODE (XEXP (x, 0)) == REG)
6658 rtx temp = gen_reg_rtx (Pmode);
6659 rtx val = force_operand (XEXP (x, 1), temp);
6661 emit_move_insn (temp, val);
6667 else if (GET_CODE (XEXP (x, 1)) == REG)
6669 rtx temp = gen_reg_rtx (Pmode);
6670 rtx val = force_operand (XEXP (x, 0), temp);
6672 emit_move_insn (temp, val);
6682 /* Print an integer constant expression in assembler syntax. Addition
6683 and subtraction are the only arithmetic that may appear in these
6684 expressions. FILE is the stdio stream to write to, X is the rtx, and
6685 CODE is the operand print code from the output string. */
6688 output_pic_addr_const (FILE *file, rtx x, int code)
6692 switch (GET_CODE (x))
6702 assemble_name (file, XSTR (x, 0));
6703 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6704 fputs ("@PLT", file);
6711 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6712 assemble_name (asm_out_file, buf);
6716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6720 /* This used to output parentheses around the expression,
6721 but that does not work on the 386 (either ATT or BSD assembler). */
6722 output_pic_addr_const (file, XEXP (x, 0), code);
6726 if (GET_MODE (x) == VOIDmode)
6728 /* We can use %d if the number is <32 bits and positive. */
6729 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6730 fprintf (file, "0x%lx%08lx",
6731 (unsigned long) CONST_DOUBLE_HIGH (x),
6732 (unsigned long) CONST_DOUBLE_LOW (x));
6734 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6737 /* We can't handle floating point constants;
6738 PRINT_OPERAND must handle them. */
6739 output_operand_lossage ("floating constant misused");
6743 /* Some assemblers need integer constants to appear first. */
6744 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6746 output_pic_addr_const (file, XEXP (x, 0), code);
6748 output_pic_addr_const (file, XEXP (x, 1), code);
6750 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6752 output_pic_addr_const (file, XEXP (x, 1), code);
6754 output_pic_addr_const (file, XEXP (x, 0), code);
6762 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6763 output_pic_addr_const (file, XEXP (x, 0), code);
6765 output_pic_addr_const (file, XEXP (x, 1), code);
6767 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6771 if (XVECLEN (x, 0) != 1)
6773 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6774 switch (XINT (x, 1))
6777 fputs ("@GOT", file);
6780 fputs ("@GOTOFF", file);
6782 case UNSPEC_GOTPCREL:
6783 fputs ("@GOTPCREL(%rip)", file);
6785 case UNSPEC_GOTTPOFF:
6786 /* FIXME: This might be @TPOFF in Sun ld too. */
6787 fputs ("@GOTTPOFF", file);
6790 fputs ("@TPOFF", file);
6794 fputs ("@TPOFF", file);
6796 fputs ("@NTPOFF", file);
6799 fputs ("@DTPOFF", file);
6801 case UNSPEC_GOTNTPOFF:
6803 fputs ("@GOTTPOFF(%rip)", file);
6805 fputs ("@GOTNTPOFF", file);
6807 case UNSPEC_INDNTPOFF:
6808 fputs ("@INDNTPOFF", file);
6811 output_operand_lossage ("invalid UNSPEC as operand");
6817 output_operand_lossage ("invalid expression as operand");
6821 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6822 We need to handle our special PIC relocations. */
6825 i386_dwarf_output_addr_const (FILE *file, rtx x)
6828 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6832 fprintf (file, "%s", ASM_LONG);
6835 output_pic_addr_const (file, x, '\0');
6837 output_addr_const (file, x);
6841 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6842 We need to emit DTP-relative relocations. */
6845 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6847 fputs (ASM_LONG, file);
6848 output_addr_const (file, x);
6849 fputs ("@DTPOFF", file);
6855 fputs (", 0", file);
6862 /* In the name of slightly smaller debug output, and to cater to
6863 general assembler losage, recognize PIC+GOTOFF and turn it back
6864 into a direct symbol reference. */
6867 ix86_delegitimize_address (rtx orig_x)
6871 if (GET_CODE (x) == MEM)
6876 if (GET_CODE (x) != CONST
6877 || GET_CODE (XEXP (x, 0)) != UNSPEC
6878 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6879 || GET_CODE (orig_x) != MEM)
6881 return XVECEXP (XEXP (x, 0), 0, 0);
6884 if (GET_CODE (x) != PLUS
6885 || GET_CODE (XEXP (x, 1)) != CONST)
6888 if (GET_CODE (XEXP (x, 0)) == REG
6889 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6890 /* %ebx + GOT/GOTOFF */
6892 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6894 /* %ebx + %reg * scale + GOT/GOTOFF */
6896 if (GET_CODE (XEXP (y, 0)) == REG
6897 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6899 else if (GET_CODE (XEXP (y, 1)) == REG
6900 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6904 if (GET_CODE (y) != REG
6905 && GET_CODE (y) != MULT
6906 && GET_CODE (y) != ASHIFT)
6912 x = XEXP (XEXP (x, 1), 0);
6913 if (GET_CODE (x) == UNSPEC
6914 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6915 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6918 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6919 return XVECEXP (x, 0, 0);
6922 if (GET_CODE (x) == PLUS
6923 && GET_CODE (XEXP (x, 0)) == UNSPEC
6924 && GET_CODE (XEXP (x, 1)) == CONST_INT
6925 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6926 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6927 && GET_CODE (orig_x) != MEM)))
6929 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6931 return gen_rtx_PLUS (Pmode, y, x);
6939 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6944 if (mode == CCFPmode || mode == CCFPUmode)
6946 enum rtx_code second_code, bypass_code;
6947 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6948 if (bypass_code != NIL || second_code != NIL)
6950 code = ix86_fp_compare_code_to_integer (code);
6954 code = reverse_condition (code);
6965 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6970 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6971 Those same assemblers have the same but opposite losage on cmov. */
6974 suffix = fp ? "nbe" : "a";
6977 if (mode == CCNOmode || mode == CCGOCmode)
6979 else if (mode == CCmode || mode == CCGCmode)
6990 if (mode == CCNOmode || mode == CCGOCmode)
6992 else if (mode == CCmode || mode == CCGCmode)
7001 suffix = fp ? "nb" : "ae";
7004 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7014 suffix = fp ? "u" : "p";
7017 suffix = fp ? "nu" : "np";
7022 fputs (suffix, file);
7025 /* Print the name of register X to FILE based on its machine mode and number.
7026 If CODE is 'w', pretend the mode is HImode.
7027 If CODE is 'b', pretend the mode is QImode.
7028 If CODE is 'k', pretend the mode is SImode.
7029 If CODE is 'q', pretend the mode is DImode.
7030 If CODE is 'h', pretend the reg is the `high' byte register.
7031 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7034 print_reg (rtx x, int code, FILE *file)
7036 if (REGNO (x) == ARG_POINTER_REGNUM
7037 || REGNO (x) == FRAME_POINTER_REGNUM
7038 || REGNO (x) == FLAGS_REG
7039 || REGNO (x) == FPSR_REG)
7042 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7045 if (code == 'w' || MMX_REG_P (x))
7047 else if (code == 'b')
7049 else if (code == 'k')
7051 else if (code == 'q')
7053 else if (code == 'y')
7055 else if (code == 'h')
7058 code = GET_MODE_SIZE (GET_MODE (x));
7060 /* Irritatingly, AMD extended registers use different naming convention
7061 from the normal registers. */
7062 if (REX_INT_REG_P (x))
7069 error ("extended registers have no high halves");
7072 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7075 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7078 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7081 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7084 error ("unsupported operand size for extended register");
7092 if (STACK_TOP_P (x))
7094 fputs ("st(0)", file);
7101 if (! ANY_FP_REG_P (x))
7102 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7107 fputs (hi_reg_name[REGNO (x)], file);
7110 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7112 fputs (qi_reg_name[REGNO (x)], file);
7115 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7117 fputs (qi_high_reg_name[REGNO (x)], file);
7124 /* Locate some local-dynamic symbol still in use by this function
7125 so that we can print its name in some tls_local_dynamic_base
7129 get_some_local_dynamic_name (void)
7133 if (cfun->machine->some_ld_name)
7134 return cfun->machine->some_ld_name;
7136 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7138 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7139 return cfun->machine->some_ld_name;
7145 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7149 if (GET_CODE (x) == SYMBOL_REF
7150 && local_dynamic_symbolic_operand (x, Pmode))
7152 cfun->machine->some_ld_name = XSTR (x, 0);
7160 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7161 C -- print opcode suffix for set/cmov insn.
7162 c -- like C, but print reversed condition
7163 F,f -- likewise, but for floating-point.
7164 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7166 R -- print the prefix for register names.
7167 z -- print the opcode suffix for the size of the current operand.
7168 * -- print a star (in certain assembler syntax)
7169 A -- print an absolute memory reference.
7170 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7171 s -- print a shift double count, followed by the assemblers argument
7173 b -- print the QImode name of the register for the indicated operand.
7174 %b0 would print %al if operands[0] is reg 0.
7175 w -- likewise, print the HImode name of the register.
7176 k -- likewise, print the SImode name of the register.
7177 q -- likewise, print the DImode name of the register.
7178 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7179 y -- print "st(0)" instead of "st" as a register.
7180 D -- print condition for SSE cmp instruction.
7181 P -- if PIC, print an @PLT suffix.
7182 X -- don't print any sort of PIC '@' suffix for a symbol.
7183 & -- print some in-use local-dynamic symbol name.
7187 print_operand (FILE *file, rtx x, int code)
7194 if (ASSEMBLER_DIALECT == ASM_ATT)
7199 assemble_name (file, get_some_local_dynamic_name ());
7203 if (ASSEMBLER_DIALECT == ASM_ATT)
7205 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7207 /* Intel syntax. For absolute addresses, registers should not
7208 be surrounded by braces. */
7209 if (GET_CODE (x) != REG)
7212 PRINT_OPERAND (file, x, 0);
7220 PRINT_OPERAND (file, x, 0);
7225 if (ASSEMBLER_DIALECT == ASM_ATT)
7230 if (ASSEMBLER_DIALECT == ASM_ATT)
7235 if (ASSEMBLER_DIALECT == ASM_ATT)
7240 if (ASSEMBLER_DIALECT == ASM_ATT)
7245 if (ASSEMBLER_DIALECT == ASM_ATT)
7250 if (ASSEMBLER_DIALECT == ASM_ATT)
7255 /* 387 opcodes don't get size suffixes if the operands are
7257 if (STACK_REG_P (x))
7260 /* Likewise if using Intel opcodes. */
7261 if (ASSEMBLER_DIALECT == ASM_INTEL)
7264 /* This is the size of op from size of operand. */
7265 switch (GET_MODE_SIZE (GET_MODE (x)))
7268 #ifdef HAVE_GAS_FILDS_FISTS
7274 if (GET_MODE (x) == SFmode)
7289 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7291 #ifdef GAS_MNEMONICS
7317 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7319 PRINT_OPERAND (file, x, 0);
7325 /* Little bit of braindamage here. The SSE compare instructions
7326 does use completely different names for the comparisons that the
7327 fp conditional moves. */
7328 switch (GET_CODE (x))
7343 fputs ("unord", file);
7347 fputs ("neq", file);
7351 fputs ("nlt", file);
7355 fputs ("nle", file);
7358 fputs ("ord", file);
7366 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7367 if (ASSEMBLER_DIALECT == ASM_ATT)
7369 switch (GET_MODE (x))
7371 case HImode: putc ('w', file); break;
7373 case SFmode: putc ('l', file); break;
7375 case DFmode: putc ('q', file); break;
7383 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7386 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7387 if (ASSEMBLER_DIALECT == ASM_ATT)
7390 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7393 /* Like above, but reverse condition */
7395 /* Check to see if argument to %c is really a constant
7396 and not a condition code which needs to be reversed. */
7397 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7399 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7402 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7405 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7406 if (ASSEMBLER_DIALECT == ASM_ATT)
7409 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7415 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7418 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7421 int pred_val = INTVAL (XEXP (x, 0));
7423 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7424 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7426 int taken = pred_val > REG_BR_PROB_BASE / 2;
7427 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7429 /* Emit hints only in the case default branch prediction
7430 heuristics would fail. */
7431 if (taken != cputaken)
7433 /* We use 3e (DS) prefix for taken branches and
7434 2e (CS) prefix for not taken branches. */
7436 fputs ("ds ; ", file);
7438 fputs ("cs ; ", file);
7445 output_operand_lossage ("invalid operand code `%c'", code);
7449 if (GET_CODE (x) == REG)
7450 print_reg (x, code, file);
7452 else if (GET_CODE (x) == MEM)
7454 /* No `byte ptr' prefix for call instructions. */
7455 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7458 switch (GET_MODE_SIZE (GET_MODE (x)))
7460 case 1: size = "BYTE"; break;
7461 case 2: size = "WORD"; break;
7462 case 4: size = "DWORD"; break;
7463 case 8: size = "QWORD"; break;
7464 case 12: size = "XWORD"; break;
7465 case 16: size = "XMMWORD"; break;
7470 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7473 else if (code == 'w')
7475 else if (code == 'k')
7479 fputs (" PTR ", file);
7483 /* Avoid (%rip) for call operands. */
7484 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7485 && GET_CODE (x) != CONST_INT)
7486 output_addr_const (file, x);
7487 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7488 output_operand_lossage ("invalid constraints for operand");
7493 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7498 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7499 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7501 if (ASSEMBLER_DIALECT == ASM_ATT)
7503 fprintf (file, "0x%08lx", l);
7506 /* These float cases don't actually occur as immediate operands. */
7507 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7511 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7512 fprintf (file, "%s", dstr);
7515 else if (GET_CODE (x) == CONST_DOUBLE
7516 && GET_MODE (x) == XFmode)
7520 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7521 fprintf (file, "%s", dstr);
7528 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7530 if (ASSEMBLER_DIALECT == ASM_ATT)
7533 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7534 || GET_CODE (x) == LABEL_REF)
7536 if (ASSEMBLER_DIALECT == ASM_ATT)
7539 fputs ("OFFSET FLAT:", file);
7542 if (GET_CODE (x) == CONST_INT)
7543 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7545 output_pic_addr_const (file, x, code);
7547 output_addr_const (file, x);
7551 /* Print a memory operand whose address is ADDR. */
7554 print_operand_address (FILE *file, rtx addr)
7556 struct ix86_address parts;
7557 rtx base, index, disp;
7560 if (! ix86_decompose_address (addr, &parts))
7564 index = parts.index;
7566 scale = parts.scale;
7574 if (USER_LABEL_PREFIX[0] == 0)
7576 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7582 if (!base && !index)
7584 /* Displacement only requires special attention. */
7586 if (GET_CODE (disp) == CONST_INT)
7588 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7590 if (USER_LABEL_PREFIX[0] == 0)
7592 fputs ("ds:", file);
7594 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7597 output_pic_addr_const (file, disp, 0);
7599 output_addr_const (file, disp);
7601 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7603 && ((GET_CODE (disp) == SYMBOL_REF
7604 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7605 || GET_CODE (disp) == LABEL_REF
7606 || (GET_CODE (disp) == CONST
7607 && GET_CODE (XEXP (disp, 0)) == PLUS
7608 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7609 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7610 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7611 fputs ("(%rip)", file);
7615 if (ASSEMBLER_DIALECT == ASM_ATT)
7620 output_pic_addr_const (file, disp, 0);
7621 else if (GET_CODE (disp) == LABEL_REF)
7622 output_asm_label (disp);
7624 output_addr_const (file, disp);
7629 print_reg (base, 0, file);
7633 print_reg (index, 0, file);
7635 fprintf (file, ",%d", scale);
7641 rtx offset = NULL_RTX;
7645 /* Pull out the offset of a symbol; print any symbol itself. */
7646 if (GET_CODE (disp) == CONST
7647 && GET_CODE (XEXP (disp, 0)) == PLUS
7648 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7650 offset = XEXP (XEXP (disp, 0), 1);
7651 disp = gen_rtx_CONST (VOIDmode,
7652 XEXP (XEXP (disp, 0), 0));
7656 output_pic_addr_const (file, disp, 0);
7657 else if (GET_CODE (disp) == LABEL_REF)
7658 output_asm_label (disp);
7659 else if (GET_CODE (disp) == CONST_INT)
7662 output_addr_const (file, disp);
7668 print_reg (base, 0, file);
7671 if (INTVAL (offset) >= 0)
7673 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7677 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7684 print_reg (index, 0, file);
7686 fprintf (file, "*%d", scale);
7694 output_addr_const_extra (FILE *file, rtx x)
7698 if (GET_CODE (x) != UNSPEC)
7701 op = XVECEXP (x, 0, 0);
7702 switch (XINT (x, 1))
7704 case UNSPEC_GOTTPOFF:
7705 output_addr_const (file, op);
7706 /* FIXME: This might be @TPOFF in Sun ld. */
7707 fputs ("@GOTTPOFF", file);
7710 output_addr_const (file, op);
7711 fputs ("@TPOFF", file);
7714 output_addr_const (file, op);
7716 fputs ("@TPOFF", file);
7718 fputs ("@NTPOFF", file);
7721 output_addr_const (file, op);
7722 fputs ("@DTPOFF", file);
7724 case UNSPEC_GOTNTPOFF:
7725 output_addr_const (file, op);
7727 fputs ("@GOTTPOFF(%rip)", file);
7729 fputs ("@GOTNTPOFF", file);
7731 case UNSPEC_INDNTPOFF:
7732 output_addr_const (file, op);
7733 fputs ("@INDNTPOFF", file);
7743 /* Split one or more DImode RTL references into pairs of SImode
7744 references. The RTL can be REG, offsettable MEM, integer constant, or
7745 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7746 split and "num" is its length. lo_half and hi_half are output arrays
7747 that parallel "operands". */
7750 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7754 rtx op = operands[num];
7756 /* simplify_subreg refuse to split volatile memory addresses,
7757 but we still have to handle it. */
7758 if (GET_CODE (op) == MEM)
7760 lo_half[num] = adjust_address (op, SImode, 0);
7761 hi_half[num] = adjust_address (op, SImode, 4);
7765 lo_half[num] = simplify_gen_subreg (SImode, op,
7766 GET_MODE (op) == VOIDmode
7767 ? DImode : GET_MODE (op), 0);
7768 hi_half[num] = simplify_gen_subreg (SImode, op,
7769 GET_MODE (op) == VOIDmode
7770 ? DImode : GET_MODE (op), 4);
7774 /* Split one or more TImode RTL references into pairs of SImode
7775 references. The RTL can be REG, offsettable MEM, integer constant, or
7776 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7777 split and "num" is its length. lo_half and hi_half are output arrays
7778 that parallel "operands". */
7781 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7785 rtx op = operands[num];
7787 /* simplify_subreg refuse to split volatile memory addresses, but we
7788 still have to handle it. */
7789 if (GET_CODE (op) == MEM)
7791 lo_half[num] = adjust_address (op, DImode, 0);
7792 hi_half[num] = adjust_address (op, DImode, 8);
7796 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7797 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7802 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7803 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7804 is the expression of the binary operation. The output may either be
7805 emitted here, or returned to the caller, like all output_* functions.
7807 There is no guarantee that the operands are the same mode, as they
7808 might be within FLOAT or FLOAT_EXTEND expressions. */
7810 #ifndef SYSV386_COMPAT
7811 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7812 wants to fix the assemblers because that causes incompatibility
7813 with gcc. No-one wants to fix gcc because that causes
7814 incompatibility with assemblers... You can use the option of
7815 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7816 #define SYSV386_COMPAT 1
7820 output_387_binary_op (rtx insn, rtx *operands)
7822 static char buf[30];
7825 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7827 #ifdef ENABLE_CHECKING
7828 /* Even if we do not want to check the inputs, this documents input
7829 constraints. Which helps in understanding the following code. */
7830 if (STACK_REG_P (operands[0])
7831 && ((REG_P (operands[1])
7832 && REGNO (operands[0]) == REGNO (operands[1])
7833 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7834 || (REG_P (operands[2])
7835 && REGNO (operands[0]) == REGNO (operands[2])
7836 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7837 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7843 switch (GET_CODE (operands[3]))
7846 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7847 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7855 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7856 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7864 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7865 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7873 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7874 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7888 if (GET_MODE (operands[0]) == SFmode)
7889 strcat (buf, "ss\t{%2, %0|%0, %2}");
7891 strcat (buf, "sd\t{%2, %0|%0, %2}");
7896 switch (GET_CODE (operands[3]))
7900 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7902 rtx temp = operands[2];
7903 operands[2] = operands[1];
7907 /* know operands[0] == operands[1]. */
7909 if (GET_CODE (operands[2]) == MEM)
7915 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7917 if (STACK_TOP_P (operands[0]))
7918 /* How is it that we are storing to a dead operand[2]?
7919 Well, presumably operands[1] is dead too. We can't
7920 store the result to st(0) as st(0) gets popped on this
7921 instruction. Instead store to operands[2] (which I
7922 think has to be st(1)). st(1) will be popped later.
7923 gcc <= 2.8.1 didn't have this check and generated
7924 assembly code that the Unixware assembler rejected. */
7925 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7927 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7931 if (STACK_TOP_P (operands[0]))
7932 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7934 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7939 if (GET_CODE (operands[1]) == MEM)
7945 if (GET_CODE (operands[2]) == MEM)
7951 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7954 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7955 derived assemblers, confusingly reverse the direction of
7956 the operation for fsub{r} and fdiv{r} when the
7957 destination register is not st(0). The Intel assembler
7958 doesn't have this brain damage. Read !SYSV386_COMPAT to
7959 figure out what the hardware really does. */
7960 if (STACK_TOP_P (operands[0]))
7961 p = "{p\t%0, %2|rp\t%2, %0}";
7963 p = "{rp\t%2, %0|p\t%0, %2}";
7965 if (STACK_TOP_P (operands[0]))
7966 /* As above for fmul/fadd, we can't store to st(0). */
7967 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7969 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7974 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7977 if (STACK_TOP_P (operands[0]))
7978 p = "{rp\t%0, %1|p\t%1, %0}";
7980 p = "{p\t%1, %0|rp\t%0, %1}";
7982 if (STACK_TOP_P (operands[0]))
7983 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7985 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7990 if (STACK_TOP_P (operands[0]))
7992 if (STACK_TOP_P (operands[1]))
7993 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7995 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7998 else if (STACK_TOP_P (operands[1]))
8001 p = "{\t%1, %0|r\t%0, %1}";
8003 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8009 p = "{r\t%2, %0|\t%0, %2}";
8011 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8024 /* Output code to initialize control word copies used by
8025 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8026 is set to control word rounding downwards. */
8028 emit_i387_cw_initialization (rtx normal, rtx round_down)
8030 rtx reg = gen_reg_rtx (HImode);
8032 emit_insn (gen_x86_fnstcw_1 (normal));
8033 emit_move_insn (reg, normal);
8034 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8036 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8038 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8039 emit_move_insn (round_down, reg);
8042 /* Output code for INSN to convert a float to a signed int. OPERANDS
8043 are the insn operands. The output may be [HSD]Imode and the input
8044 operand may be [SDX]Fmode. */
8047 output_fix_trunc (rtx insn, rtx *operands)
8049 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8050 int dimode_p = GET_MODE (operands[0]) == DImode;
8052 /* Jump through a hoop or two for DImode, since the hardware has no
8053 non-popping instruction. We used to do this a different way, but
8054 that was somewhat fragile and broke with post-reload splitters. */
8055 if (dimode_p && !stack_top_dies)
8056 output_asm_insn ("fld\t%y1", operands);
8058 if (!STACK_TOP_P (operands[1]))
8061 if (GET_CODE (operands[0]) != MEM)
8064 output_asm_insn ("fldcw\t%3", operands);
8065 if (stack_top_dies || dimode_p)
8066 output_asm_insn ("fistp%z0\t%0", operands);
8068 output_asm_insn ("fist%z0\t%0", operands);
8069 output_asm_insn ("fldcw\t%2", operands);
8074 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8075 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8076 when fucom should be used. */
8079 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8082 rtx cmp_op0 = operands[0];
8083 rtx cmp_op1 = operands[1];
8084 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8089 cmp_op1 = operands[2];
8093 if (GET_MODE (operands[0]) == SFmode)
8095 return "ucomiss\t{%1, %0|%0, %1}";
8097 return "comiss\t{%1, %0|%0, %1}";
8100 return "ucomisd\t{%1, %0|%0, %1}";
8102 return "comisd\t{%1, %0|%0, %1}";
8105 if (! STACK_TOP_P (cmp_op0))
8108 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8110 if (STACK_REG_P (cmp_op1)
8112 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8113 && REGNO (cmp_op1) != FIRST_STACK_REG)
8115 /* If both the top of the 387 stack dies, and the other operand
8116 is also a stack register that dies, then this must be a
8117 `fcompp' float compare */
8121 /* There is no double popping fcomi variant. Fortunately,
8122 eflags is immune from the fstp's cc clobbering. */
8124 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8126 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8134 return "fucompp\n\tfnstsw\t%0";
8136 return "fcompp\n\tfnstsw\t%0";
8149 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8151 static const char * const alt[24] =
8163 "fcomi\t{%y1, %0|%0, %y1}",
8164 "fcomip\t{%y1, %0|%0, %y1}",
8165 "fucomi\t{%y1, %0|%0, %y1}",
8166 "fucomip\t{%y1, %0|%0, %y1}",
8173 "fcom%z2\t%y2\n\tfnstsw\t%0",
8174 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8175 "fucom%z2\t%y2\n\tfnstsw\t%0",
8176 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8178 "ficom%z2\t%y2\n\tfnstsw\t%0",
8179 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8187 mask = eflags_p << 3;
8188 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8189 mask |= unordered_p << 1;
8190 mask |= stack_top_dies;
8203 ix86_output_addr_vec_elt (FILE *file, int value)
8205 const char *directive = ASM_LONG;
8210 directive = ASM_QUAD;
8216 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8220 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8223 fprintf (file, "%s%s%d-%s%d\n",
8224 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8225 else if (HAVE_AS_GOTOFF_IN_DATA)
8226 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8228 else if (TARGET_MACHO)
8230 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8231 machopic_output_function_base_name (file);
8232 fprintf(file, "\n");
8236 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8237 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8240 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8244 ix86_expand_clear (rtx dest)
8248 /* We play register width games, which are only valid after reload. */
8249 if (!reload_completed)
8252 /* Avoid HImode and its attendant prefix byte. */
8253 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8254 dest = gen_rtx_REG (SImode, REGNO (dest));
8256 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8258 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8259 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8261 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8262 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8268 /* X is an unchanging MEM. If it is a constant pool reference, return
8269 the constant pool rtx, else NULL. */
8272 maybe_get_pool_constant (rtx x)
8274 x = ix86_delegitimize_address (XEXP (x, 0));
8276 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8277 return get_pool_constant (x);
8283 ix86_expand_move (enum machine_mode mode, rtx operands[])
8285 int strict = (reload_in_progress || reload_completed);
8287 enum tls_model model;
8292 model = tls_symbolic_operand (op1, Pmode);
8295 op1 = legitimize_tls_address (op1, model, true);
8296 op1 = force_operand (op1, op0);
8301 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8306 rtx temp = ((reload_in_progress
8307 || ((op0 && GET_CODE (op0) == REG)
8309 ? op0 : gen_reg_rtx (Pmode));
8310 op1 = machopic_indirect_data_reference (op1, temp);
8311 op1 = machopic_legitimize_pic_address (op1, mode,
8312 temp == op1 ? 0 : temp);
8314 else if (MACHOPIC_INDIRECT)
8315 op1 = machopic_indirect_data_reference (op1, 0);
8319 if (GET_CODE (op0) == MEM)
8320 op1 = force_reg (Pmode, op1);
8324 if (GET_CODE (temp) != REG)
8325 temp = gen_reg_rtx (Pmode);
8326 temp = legitimize_pic_address (op1, temp);
8331 #endif /* TARGET_MACHO */
8335 if (GET_CODE (op0) == MEM
8336 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8337 || !push_operand (op0, mode))
8338 && GET_CODE (op1) == MEM)
8339 op1 = force_reg (mode, op1);
8341 if (push_operand (op0, mode)
8342 && ! general_no_elim_operand (op1, mode))
8343 op1 = copy_to_mode_reg (mode, op1);
8345 /* Force large constants in 64bit compilation into register
8346 to get them CSEed. */
8347 if (TARGET_64BIT && mode == DImode
8348 && immediate_operand (op1, mode)
8349 && !x86_64_zero_extended_value (op1)
8350 && !register_operand (op0, mode)
8351 && optimize && !reload_completed && !reload_in_progress)
8352 op1 = copy_to_mode_reg (mode, op1);
8354 if (FLOAT_MODE_P (mode))
8356 /* If we are loading a floating point constant to a register,
8357 force the value to memory now, since we'll get better code
8358 out the back end. */
8362 else if (GET_CODE (op1) == CONST_DOUBLE)
8364 op1 = validize_mem (force_const_mem (mode, op1));
8365 if (!register_operand (op0, mode))
8367 rtx temp = gen_reg_rtx (mode);
8368 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8369 emit_move_insn (op0, temp);
8376 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8380 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8382 /* Force constants other than zero into memory. We do not know how
8383 the instructions used to build constants modify the upper 64 bits
8384 of the register, once we have that information we may be able
8385 to handle some of them more efficiently. */
8386 if ((reload_in_progress | reload_completed) == 0
8387 && register_operand (operands[0], mode)
8388 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8389 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8391 /* Make operand1 a register if it isn't already. */
8393 && !register_operand (operands[0], mode)
8394 && !register_operand (operands[1], mode))
8396 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8397 emit_move_insn (operands[0], temp);
8401 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8404 /* Attempt to expand a binary operator. Make the expansion closer to the
8405 actual machine, then just general_operand, which will allow 3 separate
8406 memory references (one output, two input) in a single insn. */
8409 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8412 int matching_memory;
8413 rtx src1, src2, dst, op, clob;
8419 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8420 if (GET_RTX_CLASS (code) == 'c'
8421 && (rtx_equal_p (dst, src2)
8422 || immediate_operand (src1, mode)))
8429 /* If the destination is memory, and we do not have matching source
8430 operands, do things in registers. */
8431 matching_memory = 0;
8432 if (GET_CODE (dst) == MEM)
8434 if (rtx_equal_p (dst, src1))
8435 matching_memory = 1;
8436 else if (GET_RTX_CLASS (code) == 'c'
8437 && rtx_equal_p (dst, src2))
8438 matching_memory = 2;
8440 dst = gen_reg_rtx (mode);
8443 /* Both source operands cannot be in memory. */
8444 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8446 if (matching_memory != 2)
8447 src2 = force_reg (mode, src2);
8449 src1 = force_reg (mode, src1);
8452 /* If the operation is not commutable, source 1 cannot be a constant
8453 or non-matching memory. */
8454 if ((CONSTANT_P (src1)
8455 || (!matching_memory && GET_CODE (src1) == MEM))
8456 && GET_RTX_CLASS (code) != 'c')
8457 src1 = force_reg (mode, src1);
8459 /* If optimizing, copy to regs to improve CSE */
8460 if (optimize && ! no_new_pseudos)
8462 if (GET_CODE (dst) == MEM)
8463 dst = gen_reg_rtx (mode);
8464 if (GET_CODE (src1) == MEM)
8465 src1 = force_reg (mode, src1);
8466 if (GET_CODE (src2) == MEM)
8467 src2 = force_reg (mode, src2);
8470 /* Emit the instruction. */
8472 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8473 if (reload_in_progress)
8475 /* Reload doesn't know about the flags register, and doesn't know that
8476 it doesn't want to clobber it. We can only do this with PLUS. */
8483 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8484 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8487 /* Fix up the destination if needed. */
8488 if (dst != operands[0])
8489 emit_move_insn (operands[0], dst);
8492 /* Return TRUE or FALSE depending on whether the binary operator meets the
8493 appropriate constraints. */
8496 ix86_binary_operator_ok (enum rtx_code code,
8497 enum machine_mode mode ATTRIBUTE_UNUSED,
8500 /* Both source operands cannot be in memory. */
8501 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8503 /* If the operation is not commutable, source 1 cannot be a constant. */
8504 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8506 /* If the destination is memory, we must have a matching source operand. */
8507 if (GET_CODE (operands[0]) == MEM
8508 && ! (rtx_equal_p (operands[0], operands[1])
8509 || (GET_RTX_CLASS (code) == 'c'
8510 && rtx_equal_p (operands[0], operands[2]))))
8512 /* If the operation is not commutable and the source 1 is memory, we must
8513 have a matching destination. */
8514 if (GET_CODE (operands[1]) == MEM
8515 && GET_RTX_CLASS (code) != 'c'
8516 && ! rtx_equal_p (operands[0], operands[1]))
8521 /* Attempt to expand a unary operator. Make the expansion closer to the
8522 actual machine, then just general_operand, which will allow 2 separate
8523 memory references (one output, one input) in a single insn. */
8526 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8529 int matching_memory;
8530 rtx src, dst, op, clob;
8535 /* If the destination is memory, and we do not have matching source
8536 operands, do things in registers. */
8537 matching_memory = 0;
8538 if (GET_CODE (dst) == MEM)
8540 if (rtx_equal_p (dst, src))
8541 matching_memory = 1;
8543 dst = gen_reg_rtx (mode);
8546 /* When source operand is memory, destination must match. */
8547 if (!matching_memory && GET_CODE (src) == MEM)
8548 src = force_reg (mode, src);
8550 /* If optimizing, copy to regs to improve CSE */
8551 if (optimize && ! no_new_pseudos)
8553 if (GET_CODE (dst) == MEM)
8554 dst = gen_reg_rtx (mode);
8555 if (GET_CODE (src) == MEM)
8556 src = force_reg (mode, src);
8559 /* Emit the instruction. */
8561 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8562 if (reload_in_progress || code == NOT)
8564 /* Reload doesn't know about the flags register, and doesn't know that
8565 it doesn't want to clobber it. */
8572 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8573 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8576 /* Fix up the destination if needed. */
8577 if (dst != operands[0])
8578 emit_move_insn (operands[0], dst);
8581 /* Return TRUE or FALSE depending on whether the unary operator meets the
8582 appropriate constraints. */
8585 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8586 enum machine_mode mode ATTRIBUTE_UNUSED,
8587 rtx operands[2] ATTRIBUTE_UNUSED)
8589 /* If one of operands is memory, source and destination must match. */
8590 if ((GET_CODE (operands[0]) == MEM
8591 || GET_CODE (operands[1]) == MEM)
8592 && ! rtx_equal_p (operands[0], operands[1]))
8597 /* Return TRUE or FALSE depending on whether the first SET in INSN
8598 has source and destination with matching CC modes, and that the
8599 CC mode is at least as constrained as REQ_MODE. */
8602 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8605 enum machine_mode set_mode;
8607 set = PATTERN (insn);
8608 if (GET_CODE (set) == PARALLEL)
8609 set = XVECEXP (set, 0, 0);
8610 if (GET_CODE (set) != SET)
8612 if (GET_CODE (SET_SRC (set)) != COMPARE)
8615 set_mode = GET_MODE (SET_DEST (set));
8619 if (req_mode != CCNOmode
8620 && (req_mode != CCmode
8621 || XEXP (SET_SRC (set), 1) != const0_rtx))
8625 if (req_mode == CCGCmode)
8629 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8633 if (req_mode == CCZmode)
8643 return (GET_MODE (SET_SRC (set)) == set_mode);
8646 /* Generate insn patterns to do an integer compare of OPERANDS. */
8649 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8651 enum machine_mode cmpmode;
8654 cmpmode = SELECT_CC_MODE (code, op0, op1);
8655 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8657 /* This is very simple, but making the interface the same as in the
8658 FP case makes the rest of the code easier. */
8659 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8660 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8662 /* Return the test that should be put into the flags user, i.e.
8663 the bcc, scc, or cmov instruction. */
8664 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8667 /* Figure out whether to use ordered or unordered fp comparisons.
8668 Return the appropriate mode to use. */
8671 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8673 /* ??? In order to make all comparisons reversible, we do all comparisons
8674 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8675 all forms trapping and nontrapping comparisons, we can make inequality
8676 comparisons trapping again, since it results in better code when using
8677 FCOM based compares. */
8678 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8682 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8684 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8685 return ix86_fp_compare_mode (code);
8688 /* Only zero flag is needed. */
8690 case NE: /* ZF!=0 */
8692 /* Codes needing carry flag. */
8693 case GEU: /* CF=0 */
8694 case GTU: /* CF=0 & ZF=0 */
8695 case LTU: /* CF=1 */
8696 case LEU: /* CF=1 | ZF=1 */
8698 /* Codes possibly doable only with sign flag when
8699 comparing against zero. */
8700 case GE: /* SF=OF or SF=0 */
8701 case LT: /* SF<>OF or SF=1 */
8702 if (op1 == const0_rtx)
8705 /* For other cases Carry flag is not required. */
8707 /* Codes doable only with sign flag when comparing
8708 against zero, but we miss jump instruction for it
8709 so we need to use relational tests against overflow
8710 that thus needs to be zero. */
8711 case GT: /* ZF=0 & SF=OF */
8712 case LE: /* ZF=1 | SF<>OF */
8713 if (op1 == const0_rtx)
8717 /* strcmp pattern do (use flags) and combine may ask us for proper
8726 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8729 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8731 enum rtx_code swapped_code = swap_condition (code);
8732 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8733 || (ix86_fp_comparison_cost (swapped_code)
8734 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8737 /* Swap, force into registers, or otherwise massage the two operands
8738 to a fp comparison. The operands are updated in place; the new
8739 comparison code is returned. */
8741 static enum rtx_code
8742 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8744 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8745 rtx op0 = *pop0, op1 = *pop1;
8746 enum machine_mode op_mode = GET_MODE (op0);
8747 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8749 /* All of the unordered compare instructions only work on registers.
8750 The same is true of the XFmode compare instructions. The same is
8751 true of the fcomi compare instructions. */
8754 && (fpcmp_mode == CCFPUmode
8755 || op_mode == XFmode
8756 || ix86_use_fcomi_compare (code)))
8758 op0 = force_reg (op_mode, op0);
8759 op1 = force_reg (op_mode, op1);
8763 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8764 things around if they appear profitable, otherwise force op0
8767 if (standard_80387_constant_p (op0) == 0
8768 || (GET_CODE (op0) == MEM
8769 && ! (standard_80387_constant_p (op1) == 0
8770 || GET_CODE (op1) == MEM)))
8773 tmp = op0, op0 = op1, op1 = tmp;
8774 code = swap_condition (code);
8777 if (GET_CODE (op0) != REG)
8778 op0 = force_reg (op_mode, op0);
8780 if (CONSTANT_P (op1))
8782 if (standard_80387_constant_p (op1))
8783 op1 = force_reg (op_mode, op1);
8785 op1 = validize_mem (force_const_mem (op_mode, op1));
8789 /* Try to rearrange the comparison to make it cheaper. */
8790 if (ix86_fp_comparison_cost (code)
8791 > ix86_fp_comparison_cost (swap_condition (code))
8792 && (GET_CODE (op1) == REG || !no_new_pseudos))
8795 tmp = op0, op0 = op1, op1 = tmp;
8796 code = swap_condition (code);
8797 if (GET_CODE (op0) != REG)
8798 op0 = force_reg (op_mode, op0);
8806 /* Convert comparison codes we use to represent FP comparison to integer
8807 code that will result in proper branch. Return UNKNOWN if no such code
8809 static enum rtx_code
8810 ix86_fp_compare_code_to_integer (enum rtx_code code)
8839 /* Split comparison code CODE into comparisons we can do using branch
8840 instructions. BYPASS_CODE is comparison code for branch that will
8841 branch around FIRST_CODE and SECOND_CODE. If some of branches
8842 is not required, set value to NIL.
8843 We never require more than two branches. */
8845 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8846 enum rtx_code *first_code,
8847 enum rtx_code *second_code)
8853 /* The fcomi comparison sets flags as follows:
8863 case GT: /* GTU - CF=0 & ZF=0 */
8864 case GE: /* GEU - CF=0 */
8865 case ORDERED: /* PF=0 */
8866 case UNORDERED: /* PF=1 */
8867 case UNEQ: /* EQ - ZF=1 */
8868 case UNLT: /* LTU - CF=1 */
8869 case UNLE: /* LEU - CF=1 | ZF=1 */
8870 case LTGT: /* EQ - ZF=0 */
8872 case LT: /* LTU - CF=1 - fails on unordered */
8874 *bypass_code = UNORDERED;
8876 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8878 *bypass_code = UNORDERED;
8880 case EQ: /* EQ - ZF=1 - fails on unordered */
8882 *bypass_code = UNORDERED;
8884 case NE: /* NE - ZF=0 - fails on unordered */
8886 *second_code = UNORDERED;
8888 case UNGE: /* GEU - CF=0 - fails on unordered */
8890 *second_code = UNORDERED;
8892 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8894 *second_code = UNORDERED;
8899 if (!TARGET_IEEE_FP)
8906 /* Return cost of comparison done fcom + arithmetics operations on AX.
8907 All following functions do use number of instructions as a cost metrics.
8908 In future this should be tweaked to compute bytes for optimize_size and
8909 take into account performance of various instructions on various CPUs. */
8911 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8913 if (!TARGET_IEEE_FP)
8915 /* The cost of code output by ix86_expand_fp_compare. */
8943 /* Return cost of comparison done using fcomi operation.
8944 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8946 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8948 enum rtx_code bypass_code, first_code, second_code;
8949 /* Return arbitrarily high cost when instruction is not supported - this
8950 prevents gcc from using it. */
8953 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8954 return (bypass_code != NIL || second_code != NIL) + 2;
8957 /* Return cost of comparison done using sahf operation.
8958 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8960 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8962 enum rtx_code bypass_code, first_code, second_code;
8963 /* Return arbitrarily high cost when instruction is not preferred - this
8964 avoids gcc from using it. */
8965 if (!TARGET_USE_SAHF && !optimize_size)
8967 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8968 return (bypass_code != NIL || second_code != NIL) + 3;
8971 /* Compute cost of the comparison done using any method.
8972 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8974 ix86_fp_comparison_cost (enum rtx_code code)
8976 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8979 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8980 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8982 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8983 if (min > sahf_cost)
8985 if (min > fcomi_cost)
8990 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8993 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8994 rtx *second_test, rtx *bypass_test)
8996 enum machine_mode fpcmp_mode, intcmp_mode;
8998 int cost = ix86_fp_comparison_cost (code);
8999 enum rtx_code bypass_code, first_code, second_code;
9001 fpcmp_mode = ix86_fp_compare_mode (code);
9002 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9005 *second_test = NULL_RTX;
9007 *bypass_test = NULL_RTX;
9009 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9011 /* Do fcomi/sahf based test when profitable. */
9012 if ((bypass_code == NIL || bypass_test)
9013 && (second_code == NIL || second_test)
9014 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9018 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9019 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9025 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9026 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9028 scratch = gen_reg_rtx (HImode);
9029 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9030 emit_insn (gen_x86_sahf_1 (scratch));
9033 /* The FP codes work out to act like unsigned. */
9034 intcmp_mode = fpcmp_mode;
9036 if (bypass_code != NIL)
9037 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9038 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9040 if (second_code != NIL)
9041 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9042 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9047 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9048 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9049 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9051 scratch = gen_reg_rtx (HImode);
9052 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9054 /* In the unordered case, we have to check C2 for NaN's, which
9055 doesn't happen to work out to anything nice combination-wise.
9056 So do some bit twiddling on the value we've got in AH to come
9057 up with an appropriate set of condition codes. */
9059 intcmp_mode = CCNOmode;
9064 if (code == GT || !TARGET_IEEE_FP)
9066 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9071 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9072 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9073 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9074 intcmp_mode = CCmode;
9080 if (code == LT && TARGET_IEEE_FP)
9082 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9083 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9084 intcmp_mode = CCmode;
9089 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9095 if (code == GE || !TARGET_IEEE_FP)
9097 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9102 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9103 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9110 if (code == LE && TARGET_IEEE_FP)
9112 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9113 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9114 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9115 intcmp_mode = CCmode;
9120 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9126 if (code == EQ && TARGET_IEEE_FP)
9128 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9129 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9130 intcmp_mode = CCmode;
9135 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9142 if (code == NE && TARGET_IEEE_FP)
9144 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9145 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9151 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9157 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9161 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9170 /* Return the test that should be put into the flags user, i.e.
9171 the bcc, scc, or cmov instruction. */
9172 return gen_rtx_fmt_ee (code, VOIDmode,
9173 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9178 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9181 op0 = ix86_compare_op0;
9182 op1 = ix86_compare_op1;
9185 *second_test = NULL_RTX;
9187 *bypass_test = NULL_RTX;
9189 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9190 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9191 second_test, bypass_test);
9193 ret = ix86_expand_int_compare (code, op0, op1);
9198 /* Return true if the CODE will result in nontrivial jump sequence. */
9200 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9202 enum rtx_code bypass_code, first_code, second_code;
9205 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9206 return bypass_code != NIL || second_code != NIL;
9210 ix86_expand_branch (enum rtx_code code, rtx label)
9214 switch (GET_MODE (ix86_compare_op0))
9220 tmp = ix86_expand_compare (code, NULL, NULL);
9221 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9222 gen_rtx_LABEL_REF (VOIDmode, label),
9224 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9233 enum rtx_code bypass_code, first_code, second_code;
9235 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9238 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9240 /* Check whether we will use the natural sequence with one jump. If
9241 so, we can expand jump early. Otherwise delay expansion by
9242 creating compound insn to not confuse optimizers. */
9243 if (bypass_code == NIL && second_code == NIL
9246 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9247 gen_rtx_LABEL_REF (VOIDmode, label),
9252 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9253 ix86_compare_op0, ix86_compare_op1);
9254 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9255 gen_rtx_LABEL_REF (VOIDmode, label),
9257 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9259 use_fcomi = ix86_use_fcomi_compare (code);
9260 vec = rtvec_alloc (3 + !use_fcomi);
9261 RTVEC_ELT (vec, 0) = tmp;
9263 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9265 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9268 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9270 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9278 /* Expand DImode branch into multiple compare+branch. */
9280 rtx lo[2], hi[2], label2;
9281 enum rtx_code code1, code2, code3;
9283 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9285 tmp = ix86_compare_op0;
9286 ix86_compare_op0 = ix86_compare_op1;
9287 ix86_compare_op1 = tmp;
9288 code = swap_condition (code);
9290 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9291 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9293 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9294 avoid two branches. This costs one extra insn, so disable when
9295 optimizing for size. */
9297 if ((code == EQ || code == NE)
9299 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9304 if (hi[1] != const0_rtx)
9305 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9306 NULL_RTX, 0, OPTAB_WIDEN);
9309 if (lo[1] != const0_rtx)
9310 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9311 NULL_RTX, 0, OPTAB_WIDEN);
9313 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9314 NULL_RTX, 0, OPTAB_WIDEN);
9316 ix86_compare_op0 = tmp;
9317 ix86_compare_op1 = const0_rtx;
9318 ix86_expand_branch (code, label);
9322 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9323 op1 is a constant and the low word is zero, then we can just
9324 examine the high word. */
9326 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9329 case LT: case LTU: case GE: case GEU:
9330 ix86_compare_op0 = hi[0];
9331 ix86_compare_op1 = hi[1];
9332 ix86_expand_branch (code, label);
9338 /* Otherwise, we need two or three jumps. */
9340 label2 = gen_label_rtx ();
9343 code2 = swap_condition (code);
9344 code3 = unsigned_condition (code);
9348 case LT: case GT: case LTU: case GTU:
9351 case LE: code1 = LT; code2 = GT; break;
9352 case GE: code1 = GT; code2 = LT; break;
9353 case LEU: code1 = LTU; code2 = GTU; break;
9354 case GEU: code1 = GTU; code2 = LTU; break;
9356 case EQ: code1 = NIL; code2 = NE; break;
9357 case NE: code2 = NIL; break;
9365 * if (hi(a) < hi(b)) goto true;
9366 * if (hi(a) > hi(b)) goto false;
9367 * if (lo(a) < lo(b)) goto true;
9371 ix86_compare_op0 = hi[0];
9372 ix86_compare_op1 = hi[1];
9375 ix86_expand_branch (code1, label);
9377 ix86_expand_branch (code2, label2);
9379 ix86_compare_op0 = lo[0];
9380 ix86_compare_op1 = lo[1];
9381 ix86_expand_branch (code3, label);
9384 emit_label (label2);
9393 /* Split branch based on floating point condition. */
9395 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9396 rtx target1, rtx target2, rtx tmp)
9399 rtx label = NULL_RTX;
9401 int bypass_probability = -1, second_probability = -1, probability = -1;
9404 if (target2 != pc_rtx)
9407 code = reverse_condition_maybe_unordered (code);
9412 condition = ix86_expand_fp_compare (code, op1, op2,
9413 tmp, &second, &bypass);
9415 if (split_branch_probability >= 0)
9417 /* Distribute the probabilities across the jumps.
9418 Assume the BYPASS and SECOND to be always test
9420 probability = split_branch_probability;
9422 /* Value of 1 is low enough to make no need for probability
9423 to be updated. Later we may run some experiments and see
9424 if unordered values are more frequent in practice. */
9426 bypass_probability = 1;
9428 second_probability = 1;
9430 if (bypass != NULL_RTX)
9432 label = gen_label_rtx ();
9433 i = emit_jump_insn (gen_rtx_SET
9435 gen_rtx_IF_THEN_ELSE (VOIDmode,
9437 gen_rtx_LABEL_REF (VOIDmode,
9440 if (bypass_probability >= 0)
9442 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9443 GEN_INT (bypass_probability),
9446 i = emit_jump_insn (gen_rtx_SET
9448 gen_rtx_IF_THEN_ELSE (VOIDmode,
9449 condition, target1, target2)));
9450 if (probability >= 0)
9452 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9453 GEN_INT (probability),
9455 if (second != NULL_RTX)
9457 i = emit_jump_insn (gen_rtx_SET
9459 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9461 if (second_probability >= 0)
9463 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9464 GEN_INT (second_probability),
9467 if (label != NULL_RTX)
9472 ix86_expand_setcc (enum rtx_code code, rtx dest)
9474 rtx ret, tmp, tmpreg, equiv;
9475 rtx second_test, bypass_test;
9477 if (GET_MODE (ix86_compare_op0) == DImode
9479 return 0; /* FAIL */
9481 if (GET_MODE (dest) != QImode)
9484 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9485 PUT_MODE (ret, QImode);
9490 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9491 if (bypass_test || second_test)
9493 rtx test = second_test;
9495 rtx tmp2 = gen_reg_rtx (QImode);
9502 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9504 PUT_MODE (test, QImode);
9505 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9508 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9510 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9513 /* Attach a REG_EQUAL note describing the comparison result. */
9514 equiv = simplify_gen_relational (code, QImode,
9515 GET_MODE (ix86_compare_op0),
9516 ix86_compare_op0, ix86_compare_op1);
9517 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9519 return 1; /* DONE */
9522 /* Expand comparison setting or clearing carry flag. Return true when
9523 successful and set pop for the operation. */
9525 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9527 enum machine_mode mode =
9528 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9530 /* Do not handle DImode compares that go trought special path. Also we can't
9531 deal with FP compares yet. This is possible to add. */
9532 if ((mode == DImode && !TARGET_64BIT))
9534 if (FLOAT_MODE_P (mode))
9536 rtx second_test = NULL, bypass_test = NULL;
9537 rtx compare_op, compare_seq;
9539 /* Shortcut: following common codes never translate into carry flag compares. */
9540 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9541 || code == ORDERED || code == UNORDERED)
9544 /* These comparisons require zero flag; swap operands so they won't. */
9545 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9551 code = swap_condition (code);
9554 /* Try to expand the comparison and verify that we end up with carry flag
9555 based comparison. This is fails to be true only when we decide to expand
9556 comparison using arithmetic that is not too common scenario. */
9558 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9559 &second_test, &bypass_test);
9560 compare_seq = get_insns ();
9563 if (second_test || bypass_test)
9565 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9566 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9567 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9569 code = GET_CODE (compare_op);
9570 if (code != LTU && code != GEU)
9572 emit_insn (compare_seq);
9576 if (!INTEGRAL_MODE_P (mode))
9584 /* Convert a==0 into (unsigned)a<1. */
9587 if (op1 != const0_rtx)
9590 code = (code == EQ ? LTU : GEU);
9593 /* Convert a>b into b<a or a>=b-1. */
9596 if (GET_CODE (op1) == CONST_INT)
9598 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9599 /* Bail out on overflow. We still can swap operands but that
9600 would force loading of the constant into register. */
9601 if (op1 == const0_rtx
9602 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9604 code = (code == GTU ? GEU : LTU);
9611 code = (code == GTU ? LTU : GEU);
9615 /* Convert a>=0 into (unsigned)a<0x80000000. */
9618 if (mode == DImode || op1 != const0_rtx)
9620 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9621 code = (code == LT ? GEU : LTU);
9625 if (mode == DImode || op1 != constm1_rtx)
9627 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9628 code = (code == LE ? GEU : LTU);
9634 /* Swapping operands may cause constant to appear as first operand. */
9635 if (!nonimmediate_operand (op0, VOIDmode))
9639 op0 = force_reg (mode, op0);
9641 ix86_compare_op0 = op0;
9642 ix86_compare_op1 = op1;
9643 *pop = ix86_expand_compare (code, NULL, NULL);
9644 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9650 ix86_expand_int_movcc (rtx operands[])
9652 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9653 rtx compare_seq, compare_op;
9654 rtx second_test, bypass_test;
9655 enum machine_mode mode = GET_MODE (operands[0]);
9656 bool sign_bit_compare_p = false;;
9659 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9660 compare_seq = get_insns ();
9663 compare_code = GET_CODE (compare_op);
9665 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9666 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9667 sign_bit_compare_p = true;
9669 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9670 HImode insns, we'd be swallowed in word prefix ops. */
9672 if ((mode != HImode || TARGET_FAST_PREFIX)
9673 && (mode != DImode || TARGET_64BIT)
9674 && GET_CODE (operands[2]) == CONST_INT
9675 && GET_CODE (operands[3]) == CONST_INT)
9677 rtx out = operands[0];
9678 HOST_WIDE_INT ct = INTVAL (operands[2]);
9679 HOST_WIDE_INT cf = INTVAL (operands[3]);
9683 /* Sign bit compares are better done using shifts than we do by using
9685 if (sign_bit_compare_p
9686 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9687 ix86_compare_op1, &compare_op))
9689 /* Detect overlap between destination and compare sources. */
9692 if (!sign_bit_compare_p)
9696 compare_code = GET_CODE (compare_op);
9698 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9699 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9702 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9705 /* To simplify rest of code, restrict to the GEU case. */
9706 if (compare_code == LTU)
9708 HOST_WIDE_INT tmp = ct;
9711 compare_code = reverse_condition (compare_code);
9712 code = reverse_condition (code);
9717 PUT_CODE (compare_op,
9718 reverse_condition_maybe_unordered
9719 (GET_CODE (compare_op)));
9721 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9725 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9726 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9727 tmp = gen_reg_rtx (mode);
9730 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9732 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9736 if (code == GT || code == GE)
9737 code = reverse_condition (code);
9740 HOST_WIDE_INT tmp = ct;
9745 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9746 ix86_compare_op1, VOIDmode, 0, -1);
9759 tmp = expand_simple_binop (mode, PLUS,
9761 copy_rtx (tmp), 1, OPTAB_DIRECT);
9772 tmp = expand_simple_binop (mode, IOR,
9774 copy_rtx (tmp), 1, OPTAB_DIRECT);
9776 else if (diff == -1 && ct)
9786 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9788 tmp = expand_simple_binop (mode, PLUS,
9789 copy_rtx (tmp), GEN_INT (cf),
9790 copy_rtx (tmp), 1, OPTAB_DIRECT);
9798 * andl cf - ct, dest
9808 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9811 tmp = expand_simple_binop (mode, AND,
9813 gen_int_mode (cf - ct, mode),
9814 copy_rtx (tmp), 1, OPTAB_DIRECT);
9816 tmp = expand_simple_binop (mode, PLUS,
9817 copy_rtx (tmp), GEN_INT (ct),
9818 copy_rtx (tmp), 1, OPTAB_DIRECT);
9821 if (!rtx_equal_p (tmp, out))
9822 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9824 return 1; /* DONE */
9830 tmp = ct, ct = cf, cf = tmp;
9832 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9834 /* We may be reversing unordered compare to normal compare, that
9835 is not valid in general (we may convert non-trapping condition
9836 to trapping one), however on i386 we currently emit all
9837 comparisons unordered. */
9838 compare_code = reverse_condition_maybe_unordered (compare_code);
9839 code = reverse_condition_maybe_unordered (code);
9843 compare_code = reverse_condition (compare_code);
9844 code = reverse_condition (code);
9849 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9850 && GET_CODE (ix86_compare_op1) == CONST_INT)
9852 if (ix86_compare_op1 == const0_rtx
9853 && (code == LT || code == GE))
9854 compare_code = code;
9855 else if (ix86_compare_op1 == constm1_rtx)
9859 else if (code == GT)
9864 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9865 if (compare_code != NIL
9866 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9867 && (cf == -1 || ct == -1))
9869 /* If lea code below could be used, only optimize
9870 if it results in a 2 insn sequence. */
9872 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9873 || diff == 3 || diff == 5 || diff == 9)
9874 || (compare_code == LT && ct == -1)
9875 || (compare_code == GE && cf == -1))
9878 * notl op1 (if necessary)
9886 code = reverse_condition (code);
9889 out = emit_store_flag (out, code, ix86_compare_op0,
9890 ix86_compare_op1, VOIDmode, 0, -1);
9892 out = expand_simple_binop (mode, IOR,
9894 out, 1, OPTAB_DIRECT);
9895 if (out != operands[0])
9896 emit_move_insn (operands[0], out);
9898 return 1; /* DONE */
9903 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9904 || diff == 3 || diff == 5 || diff == 9)
9905 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9906 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9912 * lea cf(dest*(ct-cf)),dest
9916 * This also catches the degenerate setcc-only case.
9922 out = emit_store_flag (out, code, ix86_compare_op0,
9923 ix86_compare_op1, VOIDmode, 0, 1);
9926 /* On x86_64 the lea instruction operates on Pmode, so we need
9927 to get arithmetics done in proper mode to match. */
9929 tmp = copy_rtx (out);
9933 out1 = copy_rtx (out);
9934 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9938 tmp = gen_rtx_PLUS (mode, tmp, out1);
9944 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9947 if (!rtx_equal_p (tmp, out))
9950 out = force_operand (tmp, copy_rtx (out));
9952 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9954 if (!rtx_equal_p (out, operands[0]))
9955 emit_move_insn (operands[0], copy_rtx (out));
9957 return 1; /* DONE */
9961 * General case: Jumpful:
9962 * xorl dest,dest cmpl op1, op2
9963 * cmpl op1, op2 movl ct, dest
9965 * decl dest movl cf, dest
9966 * andl (cf-ct),dest 1:
9971 * This is reasonably steep, but branch mispredict costs are
9972 * high on modern cpus, so consider failing only if optimizing
9976 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9977 && BRANCH_COST >= 2)
9983 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9984 /* We may be reversing unordered compare to normal compare,
9985 that is not valid in general (we may convert non-trapping
9986 condition to trapping one), however on i386 we currently
9987 emit all comparisons unordered. */
9988 code = reverse_condition_maybe_unordered (code);
9991 code = reverse_condition (code);
9992 if (compare_code != NIL)
9993 compare_code = reverse_condition (compare_code);
9997 if (compare_code != NIL)
9999 /* notl op1 (if needed)
10004 For x < 0 (resp. x <= -1) there will be no notl,
10005 so if possible swap the constants to get rid of the
10007 True/false will be -1/0 while code below (store flag
10008 followed by decrement) is 0/-1, so the constants need
10009 to be exchanged once more. */
10011 if (compare_code == GE || !cf)
10013 code = reverse_condition (code);
10018 HOST_WIDE_INT tmp = cf;
10023 out = emit_store_flag (out, code, ix86_compare_op0,
10024 ix86_compare_op1, VOIDmode, 0, -1);
10028 out = emit_store_flag (out, code, ix86_compare_op0,
10029 ix86_compare_op1, VOIDmode, 0, 1);
10031 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10032 copy_rtx (out), 1, OPTAB_DIRECT);
10035 out = expand_simple_binop (mode, AND, copy_rtx (out),
10036 gen_int_mode (cf - ct, mode),
10037 copy_rtx (out), 1, OPTAB_DIRECT);
10039 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10040 copy_rtx (out), 1, OPTAB_DIRECT);
10041 if (!rtx_equal_p (out, operands[0]))
10042 emit_move_insn (operands[0], copy_rtx (out));
10044 return 1; /* DONE */
10048 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10050 /* Try a few things more with specific constants and a variable. */
10053 rtx var, orig_out, out, tmp;
10055 if (BRANCH_COST <= 2)
10056 return 0; /* FAIL */
10058 /* If one of the two operands is an interesting constant, load a
10059 constant with the above and mask it in with a logical operation. */
10061 if (GET_CODE (operands[2]) == CONST_INT)
10064 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10065 operands[3] = constm1_rtx, op = and_optab;
10066 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10067 operands[3] = const0_rtx, op = ior_optab;
10069 return 0; /* FAIL */
10071 else if (GET_CODE (operands[3]) == CONST_INT)
10074 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10075 operands[2] = constm1_rtx, op = and_optab;
10076 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10077 operands[2] = const0_rtx, op = ior_optab;
10079 return 0; /* FAIL */
10082 return 0; /* FAIL */
10084 orig_out = operands[0];
10085 tmp = gen_reg_rtx (mode);
10088 /* Recurse to get the constant loaded. */
10089 if (ix86_expand_int_movcc (operands) == 0)
10090 return 0; /* FAIL */
10092 /* Mask in the interesting variable. */
10093 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10095 if (!rtx_equal_p (out, orig_out))
10096 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10098 return 1; /* DONE */
10102 * For comparison with above,
10112 if (! nonimmediate_operand (operands[2], mode))
10113 operands[2] = force_reg (mode, operands[2]);
10114 if (! nonimmediate_operand (operands[3], mode))
10115 operands[3] = force_reg (mode, operands[3]);
10117 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10119 rtx tmp = gen_reg_rtx (mode);
10120 emit_move_insn (tmp, operands[3]);
10123 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10125 rtx tmp = gen_reg_rtx (mode);
10126 emit_move_insn (tmp, operands[2]);
10130 if (! register_operand (operands[2], VOIDmode)
10132 || ! register_operand (operands[3], VOIDmode)))
10133 operands[2] = force_reg (mode, operands[2]);
10136 && ! register_operand (operands[3], VOIDmode))
10137 operands[3] = force_reg (mode, operands[3]);
10139 emit_insn (compare_seq);
10140 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10141 gen_rtx_IF_THEN_ELSE (mode,
10142 compare_op, operands[2],
10145 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10146 gen_rtx_IF_THEN_ELSE (mode,
10148 copy_rtx (operands[3]),
10149 copy_rtx (operands[0]))));
10151 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10152 gen_rtx_IF_THEN_ELSE (mode,
10154 copy_rtx (operands[2]),
10155 copy_rtx (operands[0]))));
10157 return 1; /* DONE */
10161 ix86_expand_fp_movcc (rtx operands[])
10163 enum rtx_code code;
10165 rtx compare_op, second_test, bypass_test;
10167 /* For SF/DFmode conditional moves based on comparisons
10168 in same mode, we may want to use SSE min/max instructions. */
10169 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10170 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10171 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10172 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10173 && (!TARGET_IEEE_FP
10174 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10175 /* We may be called from the post-reload splitter. */
10176 && (!REG_P (operands[0])
10177 || SSE_REG_P (operands[0])
10178 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10180 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10181 code = GET_CODE (operands[1]);
10183 /* See if we have (cross) match between comparison operands and
10184 conditional move operands. */
10185 if (rtx_equal_p (operands[2], op1))
10190 code = reverse_condition_maybe_unordered (code);
10192 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10194 /* Check for min operation. */
10195 if (code == LT || code == UNLE)
10203 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10204 if (memory_operand (op0, VOIDmode))
10205 op0 = force_reg (GET_MODE (operands[0]), op0);
10206 if (GET_MODE (operands[0]) == SFmode)
10207 emit_insn (gen_minsf3 (operands[0], op0, op1));
10209 emit_insn (gen_mindf3 (operands[0], op0, op1));
10212 /* Check for max operation. */
10213 if (code == GT || code == UNGE)
10221 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10222 if (memory_operand (op0, VOIDmode))
10223 op0 = force_reg (GET_MODE (operands[0]), op0);
10224 if (GET_MODE (operands[0]) == SFmode)
10225 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10227 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10231 /* Manage condition to be sse_comparison_operator. In case we are
10232 in non-ieee mode, try to canonicalize the destination operand
10233 to be first in the comparison - this helps reload to avoid extra
10235 if (!sse_comparison_operator (operands[1], VOIDmode)
10236 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10238 rtx tmp = ix86_compare_op0;
10239 ix86_compare_op0 = ix86_compare_op1;
10240 ix86_compare_op1 = tmp;
10241 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10242 VOIDmode, ix86_compare_op0,
10245 /* Similarly try to manage result to be first operand of conditional
10246 move. We also don't support the NE comparison on SSE, so try to
10248 if ((rtx_equal_p (operands[0], operands[3])
10249 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10250 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10252 rtx tmp = operands[2];
10253 operands[2] = operands[3];
10255 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10256 (GET_CODE (operands[1])),
10257 VOIDmode, ix86_compare_op0,
10260 if (GET_MODE (operands[0]) == SFmode)
10261 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10262 operands[2], operands[3],
10263 ix86_compare_op0, ix86_compare_op1));
10265 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10266 operands[2], operands[3],
10267 ix86_compare_op0, ix86_compare_op1));
10271 /* The floating point conditional move instructions don't directly
10272 support conditions resulting from a signed integer comparison. */
10274 code = GET_CODE (operands[1]);
10275 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10277 /* The floating point conditional move instructions don't directly
10278 support signed integer comparisons. */
10280 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10282 if (second_test != NULL || bypass_test != NULL)
10284 tmp = gen_reg_rtx (QImode);
10285 ix86_expand_setcc (code, tmp);
10287 ix86_compare_op0 = tmp;
10288 ix86_compare_op1 = const0_rtx;
10289 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10291 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10293 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10294 emit_move_insn (tmp, operands[3]);
10297 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10299 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10300 emit_move_insn (tmp, operands[2]);
10304 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10305 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10310 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10311 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10316 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10317 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10325 /* Expand conditional increment or decrement using adb/sbb instructions.
10326 The default case using setcc followed by the conditional move can be
10327 done by generic code. */
10329 ix86_expand_int_addcc (rtx operands[])
10331 enum rtx_code code = GET_CODE (operands[1]);
10333 rtx val = const0_rtx;
10334 bool fpcmp = false;
10335 enum machine_mode mode = GET_MODE (operands[0]);
10337 if (operands[3] != const1_rtx
10338 && operands[3] != constm1_rtx)
10340 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10341 ix86_compare_op1, &compare_op))
10343 code = GET_CODE (compare_op);
10345 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10346 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10349 code = ix86_fp_compare_code_to_integer (code);
10356 PUT_CODE (compare_op,
10357 reverse_condition_maybe_unordered
10358 (GET_CODE (compare_op)));
10360 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10362 PUT_MODE (compare_op, mode);
10364 /* Construct either adc or sbb insn. */
10365 if ((code == LTU) == (operands[3] == constm1_rtx))
10367 switch (GET_MODE (operands[0]))
10370 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10373 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10376 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10379 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10387 switch (GET_MODE (operands[0]))
10390 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10393 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10396 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10399 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10405 return 1; /* DONE */
10409 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10410 works for floating pointer parameters and nonoffsetable memories.
10411 For pushes, it returns just stack offsets; the values will be saved
10412 in the right order. Maximally three parts are generated. */
10415 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10420 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10422 size = (GET_MODE_SIZE (mode) + 4) / 8;
10424 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10426 if (size < 2 || size > 3)
10429 /* Optimize constant pool reference to immediates. This is used by fp
10430 moves, that force all constants to memory to allow combining. */
10431 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10433 rtx tmp = maybe_get_pool_constant (operand);
10438 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10440 /* The only non-offsetable memories we handle are pushes. */
10441 if (! push_operand (operand, VOIDmode))
10444 operand = copy_rtx (operand);
10445 PUT_MODE (operand, Pmode);
10446 parts[0] = parts[1] = parts[2] = operand;
10448 else if (!TARGET_64BIT)
10450 if (mode == DImode)
10451 split_di (&operand, 1, &parts[0], &parts[1]);
10454 if (REG_P (operand))
10456 if (!reload_completed)
10458 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10459 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10461 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10463 else if (offsettable_memref_p (operand))
10465 operand = adjust_address (operand, SImode, 0);
10466 parts[0] = operand;
10467 parts[1] = adjust_address (operand, SImode, 4);
10469 parts[2] = adjust_address (operand, SImode, 8);
10471 else if (GET_CODE (operand) == CONST_DOUBLE)
10476 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10480 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10481 parts[2] = gen_int_mode (l[2], SImode);
10484 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10489 parts[1] = gen_int_mode (l[1], SImode);
10490 parts[0] = gen_int_mode (l[0], SImode);
10498 if (mode == TImode)
10499 split_ti (&operand, 1, &parts[0], &parts[1]);
10500 if (mode == XFmode || mode == TFmode)
10502 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10503 if (REG_P (operand))
10505 if (!reload_completed)
10507 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10508 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10510 else if (offsettable_memref_p (operand))
10512 operand = adjust_address (operand, DImode, 0);
10513 parts[0] = operand;
10514 parts[1] = adjust_address (operand, upper_mode, 8);
10516 else if (GET_CODE (operand) == CONST_DOUBLE)
10521 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10522 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10523 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10524 if (HOST_BITS_PER_WIDE_INT >= 64)
10527 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10528 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10531 parts[0] = immed_double_const (l[0], l[1], DImode);
10532 if (upper_mode == SImode)
10533 parts[1] = gen_int_mode (l[2], SImode);
10534 else if (HOST_BITS_PER_WIDE_INT >= 64)
10537 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10538 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10541 parts[1] = immed_double_const (l[2], l[3], DImode);
10551 /* Emit insns to perform a move or push of DI, DF, and XF values.
10552 Return false when normal moves are needed; true when all required
10553 insns have been emitted. Operands 2-4 contain the input values
10554 int the correct order; operands 5-7 contain the output values. */
10557 ix86_split_long_move (rtx operands[])
10562 int collisions = 0;
10563 enum machine_mode mode = GET_MODE (operands[0]);
10565 /* The DFmode expanders may ask us to move double.
10566 For 64bit target this is single move. By hiding the fact
10567 here we simplify i386.md splitters. */
10568 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10570 /* Optimize constant pool reference to immediates. This is used by
10571 fp moves, that force all constants to memory to allow combining. */
10573 if (GET_CODE (operands[1]) == MEM
10574 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10575 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10576 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10577 if (push_operand (operands[0], VOIDmode))
10579 operands[0] = copy_rtx (operands[0]);
10580 PUT_MODE (operands[0], Pmode);
10583 operands[0] = gen_lowpart (DImode, operands[0]);
10584 operands[1] = gen_lowpart (DImode, operands[1]);
10585 emit_move_insn (operands[0], operands[1]);
10589 /* The only non-offsettable memory we handle is push. */
10590 if (push_operand (operands[0], VOIDmode))
10592 else if (GET_CODE (operands[0]) == MEM
10593 && ! offsettable_memref_p (operands[0]))
10596 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10597 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10599 /* When emitting push, take care for source operands on the stack. */
10600 if (push && GET_CODE (operands[1]) == MEM
10601 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10604 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10605 XEXP (part[1][2], 0));
10606 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10607 XEXP (part[1][1], 0));
10610 /* We need to do copy in the right order in case an address register
10611 of the source overlaps the destination. */
10612 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10614 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10616 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10619 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10622 /* Collision in the middle part can be handled by reordering. */
10623 if (collisions == 1 && nparts == 3
10624 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10627 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10628 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10631 /* If there are more collisions, we can't handle it by reordering.
10632 Do an lea to the last part and use only one colliding move. */
10633 else if (collisions > 1)
10639 base = part[0][nparts - 1];
10641 /* Handle the case when the last part isn't valid for lea.
10642 Happens in 64-bit mode storing the 12-byte XFmode. */
10643 if (GET_MODE (base) != Pmode)
10644 base = gen_rtx_REG (Pmode, REGNO (base));
10646 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10647 part[1][0] = replace_equiv_address (part[1][0], base);
10648 part[1][1] = replace_equiv_address (part[1][1],
10649 plus_constant (base, UNITS_PER_WORD));
10651 part[1][2] = replace_equiv_address (part[1][2],
10652 plus_constant (base, 8));
10662 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10663 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10664 emit_move_insn (part[0][2], part[1][2]);
10669 /* In 64bit mode we don't have 32bit push available. In case this is
10670 register, it is OK - we will just use larger counterpart. We also
10671 retype memory - these comes from attempt to avoid REX prefix on
10672 moving of second half of TFmode value. */
10673 if (GET_MODE (part[1][1]) == SImode)
10675 if (GET_CODE (part[1][1]) == MEM)
10676 part[1][1] = adjust_address (part[1][1], DImode, 0);
10677 else if (REG_P (part[1][1]))
10678 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10681 if (GET_MODE (part[1][0]) == SImode)
10682 part[1][0] = part[1][1];
10685 emit_move_insn (part[0][1], part[1][1]);
10686 emit_move_insn (part[0][0], part[1][0]);
10690 /* Choose correct order to not overwrite the source before it is copied. */
10691 if ((REG_P (part[0][0])
10692 && REG_P (part[1][1])
10693 && (REGNO (part[0][0]) == REGNO (part[1][1])
10695 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10697 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10701 operands[2] = part[0][2];
10702 operands[3] = part[0][1];
10703 operands[4] = part[0][0];
10704 operands[5] = part[1][2];
10705 operands[6] = part[1][1];
10706 operands[7] = part[1][0];
10710 operands[2] = part[0][1];
10711 operands[3] = part[0][0];
10712 operands[5] = part[1][1];
10713 operands[6] = part[1][0];
10720 operands[2] = part[0][0];
10721 operands[3] = part[0][1];
10722 operands[4] = part[0][2];
10723 operands[5] = part[1][0];
10724 operands[6] = part[1][1];
10725 operands[7] = part[1][2];
10729 operands[2] = part[0][0];
10730 operands[3] = part[0][1];
10731 operands[5] = part[1][0];
10732 operands[6] = part[1][1];
10735 emit_move_insn (operands[2], operands[5]);
10736 emit_move_insn (operands[3], operands[6]);
10738 emit_move_insn (operands[4], operands[7]);
10744 ix86_split_ashldi (rtx *operands, rtx scratch)
10746 rtx low[2], high[2];
10749 if (GET_CODE (operands[2]) == CONST_INT)
10751 split_di (operands, 2, low, high);
10752 count = INTVAL (operands[2]) & 63;
10756 emit_move_insn (high[0], low[1]);
10757 emit_move_insn (low[0], const0_rtx);
10760 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10764 if (!rtx_equal_p (operands[0], operands[1]))
10765 emit_move_insn (operands[0], operands[1]);
10766 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10767 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10772 if (!rtx_equal_p (operands[0], operands[1]))
10773 emit_move_insn (operands[0], operands[1]);
10775 split_di (operands, 1, low, high);
10777 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10778 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10780 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10782 if (! no_new_pseudos)
10783 scratch = force_reg (SImode, const0_rtx);
10785 emit_move_insn (scratch, const0_rtx);
10787 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10791 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10796 ix86_split_ashrdi (rtx *operands, rtx scratch)
10798 rtx low[2], high[2];
10801 if (GET_CODE (operands[2]) == CONST_INT)
10803 split_di (operands, 2, low, high);
10804 count = INTVAL (operands[2]) & 63;
10808 emit_move_insn (low[0], high[1]);
10810 if (! reload_completed)
10811 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10814 emit_move_insn (high[0], low[0]);
10815 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10819 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10823 if (!rtx_equal_p (operands[0], operands[1]))
10824 emit_move_insn (operands[0], operands[1]);
10825 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10826 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10831 if (!rtx_equal_p (operands[0], operands[1]))
10832 emit_move_insn (operands[0], operands[1]);
10834 split_di (operands, 1, low, high);
10836 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10837 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10839 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10841 if (! no_new_pseudos)
10842 scratch = gen_reg_rtx (SImode);
10843 emit_move_insn (scratch, high[0]);
10844 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10845 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10849 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10854 ix86_split_lshrdi (rtx *operands, rtx scratch)
10856 rtx low[2], high[2];
10859 if (GET_CODE (operands[2]) == CONST_INT)
10861 split_di (operands, 2, low, high);
10862 count = INTVAL (operands[2]) & 63;
10866 emit_move_insn (low[0], high[1]);
10867 emit_move_insn (high[0], const0_rtx);
10870 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10874 if (!rtx_equal_p (operands[0], operands[1]))
10875 emit_move_insn (operands[0], operands[1]);
10876 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10877 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10882 if (!rtx_equal_p (operands[0], operands[1]))
10883 emit_move_insn (operands[0], operands[1]);
10885 split_di (operands, 1, low, high);
10887 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10888 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10890 /* Heh. By reversing the arguments, we can reuse this pattern. */
10891 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10893 if (! no_new_pseudos)
10894 scratch = force_reg (SImode, const0_rtx);
10896 emit_move_insn (scratch, const0_rtx);
10898 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10902 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10906 /* Helper function for the string operations below. Dest VARIABLE whether
10907 it is aligned to VALUE bytes. If true, jump to the label. */
10909 ix86_expand_aligntest (rtx variable, int value)
10911 rtx label = gen_label_rtx ();
10912 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10913 if (GET_MODE (variable) == DImode)
10914 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10916 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10917 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10922 /* Adjust COUNTER by the VALUE. */
10924 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10926 if (GET_MODE (countreg) == DImode)
10927 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10929 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10932 /* Zero extend possibly SImode EXP to Pmode register. */
10934 ix86_zero_extend_to_Pmode (rtx exp)
10937 if (GET_MODE (exp) == VOIDmode)
10938 return force_reg (Pmode, exp);
10939 if (GET_MODE (exp) == Pmode)
10940 return copy_to_mode_reg (Pmode, exp);
10941 r = gen_reg_rtx (Pmode);
10942 emit_insn (gen_zero_extendsidi2 (r, exp));
10946 /* Expand string move (memcpy) operation. Use i386 string operations when
10947 profitable. expand_clrstr contains similar code. */
10949 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10951 rtx srcreg, destreg, countreg;
10952 enum machine_mode counter_mode;
10953 HOST_WIDE_INT align = 0;
10954 unsigned HOST_WIDE_INT count = 0;
10957 if (GET_CODE (align_exp) == CONST_INT)
10958 align = INTVAL (align_exp);
10960 /* Can't use any of this if the user has appropriated esi or edi. */
10961 if (global_regs[4] || global_regs[5])
10964 /* This simple hack avoids all inlining code and simplifies code below. */
10965 if (!TARGET_ALIGN_STRINGOPS)
10968 if (GET_CODE (count_exp) == CONST_INT)
10970 count = INTVAL (count_exp);
10971 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10975 /* Figure out proper mode for counter. For 32bits it is always SImode,
10976 for 64bits use SImode when possible, otherwise DImode.
10977 Set count to number of bytes copied when known at compile time. */
10978 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10979 || x86_64_zero_extended_value (count_exp))
10980 counter_mode = SImode;
10982 counter_mode = DImode;
10986 if (counter_mode != SImode && counter_mode != DImode)
10989 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10990 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10992 emit_insn (gen_cld ());
10994 /* When optimizing for size emit simple rep ; movsb instruction for
10995 counts not divisible by 4. */
10997 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10999 countreg = ix86_zero_extend_to_Pmode (count_exp);
11001 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11002 destreg, srcreg, countreg));
11004 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11005 destreg, srcreg, countreg));
11008 /* For constant aligned (or small unaligned) copies use rep movsl
11009 followed by code copying the rest. For PentiumPro ensure 8 byte
11010 alignment to allow rep movsl acceleration. */
11012 else if (count != 0
11014 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11015 || optimize_size || count < (unsigned int) 64))
11017 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11018 if (count & ~(size - 1))
11020 countreg = copy_to_mode_reg (counter_mode,
11021 GEN_INT ((count >> (size == 4 ? 2 : 3))
11022 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11023 countreg = ix86_zero_extend_to_Pmode (countreg);
11027 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11028 destreg, srcreg, countreg));
11030 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11031 destreg, srcreg, countreg));
11034 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11035 destreg, srcreg, countreg));
11037 if (size == 8 && (count & 0x04))
11038 emit_insn (gen_strmovsi (destreg, srcreg));
11040 emit_insn (gen_strmovhi (destreg, srcreg));
11042 emit_insn (gen_strmovqi (destreg, srcreg));
11044 /* The generic code based on the glibc implementation:
11045 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11046 allowing accelerated copying there)
11047 - copy the data using rep movsl
11048 - copy the rest. */
11053 int desired_alignment = (TARGET_PENTIUMPRO
11054 && (count == 0 || count >= (unsigned int) 260)
11055 ? 8 : UNITS_PER_WORD);
11057 /* In case we don't know anything about the alignment, default to
11058 library version, since it is usually equally fast and result in
11061 Also emit call when we know that the count is large and call overhead
11062 will not be important. */
11063 if (!TARGET_INLINE_ALL_STRINGOPS
11064 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11070 if (TARGET_SINGLE_STRINGOP)
11071 emit_insn (gen_cld ());
11073 countreg2 = gen_reg_rtx (Pmode);
11074 countreg = copy_to_mode_reg (counter_mode, count_exp);
11076 /* We don't use loops to align destination and to copy parts smaller
11077 than 4 bytes, because gcc is able to optimize such code better (in
11078 the case the destination or the count really is aligned, gcc is often
11079 able to predict the branches) and also it is friendlier to the
11080 hardware branch prediction.
11082 Using loops is beneficial for generic case, because we can
11083 handle small counts using the loops. Many CPUs (such as Athlon)
11084 have large REP prefix setup costs.
11086 This is quite costly. Maybe we can revisit this decision later or
11087 add some customizability to this code. */
11089 if (count == 0 && align < desired_alignment)
11091 label = gen_label_rtx ();
11092 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11093 LEU, 0, counter_mode, 1, label);
11097 rtx label = ix86_expand_aligntest (destreg, 1);
11098 emit_insn (gen_strmovqi (destreg, srcreg));
11099 ix86_adjust_counter (countreg, 1);
11100 emit_label (label);
11101 LABEL_NUSES (label) = 1;
11105 rtx label = ix86_expand_aligntest (destreg, 2);
11106 emit_insn (gen_strmovhi (destreg, srcreg));
11107 ix86_adjust_counter (countreg, 2);
11108 emit_label (label);
11109 LABEL_NUSES (label) = 1;
11111 if (align <= 4 && desired_alignment > 4)
11113 rtx label = ix86_expand_aligntest (destreg, 4);
11114 emit_insn (gen_strmovsi (destreg, srcreg));
11115 ix86_adjust_counter (countreg, 4);
11116 emit_label (label);
11117 LABEL_NUSES (label) = 1;
11120 if (label && desired_alignment > 4 && !TARGET_64BIT)
11122 emit_label (label);
11123 LABEL_NUSES (label) = 1;
11126 if (!TARGET_SINGLE_STRINGOP)
11127 emit_insn (gen_cld ());
11130 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11132 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11133 destreg, srcreg, countreg2));
11137 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11138 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11139 destreg, srcreg, countreg2));
11144 emit_label (label);
11145 LABEL_NUSES (label) = 1;
11147 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11148 emit_insn (gen_strmovsi (destreg, srcreg));
11149 if ((align <= 4 || count == 0) && TARGET_64BIT)
11151 rtx label = ix86_expand_aligntest (countreg, 4);
11152 emit_insn (gen_strmovsi (destreg, srcreg));
11153 emit_label (label);
11154 LABEL_NUSES (label) = 1;
11156 if (align > 2 && count != 0 && (count & 2))
11157 emit_insn (gen_strmovhi (destreg, srcreg));
11158 if (align <= 2 || count == 0)
11160 rtx label = ix86_expand_aligntest (countreg, 2);
11161 emit_insn (gen_strmovhi (destreg, srcreg));
11162 emit_label (label);
11163 LABEL_NUSES (label) = 1;
11165 if (align > 1 && count != 0 && (count & 1))
11166 emit_insn (gen_strmovqi (destreg, srcreg));
11167 if (align <= 1 || count == 0)
11169 rtx label = ix86_expand_aligntest (countreg, 1);
11170 emit_insn (gen_strmovqi (destreg, srcreg));
11171 emit_label (label);
11172 LABEL_NUSES (label) = 1;
11176 insns = get_insns ();
11179 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11184 /* Expand string clear operation (bzero). Use i386 string operations when
11185 profitable. expand_movstr contains similar code. */
11187 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11189 rtx destreg, zeroreg, countreg;
11190 enum machine_mode counter_mode;
11191 HOST_WIDE_INT align = 0;
11192 unsigned HOST_WIDE_INT count = 0;
11194 if (GET_CODE (align_exp) == CONST_INT)
11195 align = INTVAL (align_exp);
11197 /* Can't use any of this if the user has appropriated esi. */
11198 if (global_regs[4])
11201 /* This simple hack avoids all inlining code and simplifies code below. */
11202 if (!TARGET_ALIGN_STRINGOPS)
11205 if (GET_CODE (count_exp) == CONST_INT)
11207 count = INTVAL (count_exp);
11208 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11211 /* Figure out proper mode for counter. For 32bits it is always SImode,
11212 for 64bits use SImode when possible, otherwise DImode.
11213 Set count to number of bytes copied when known at compile time. */
11214 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11215 || x86_64_zero_extended_value (count_exp))
11216 counter_mode = SImode;
11218 counter_mode = DImode;
11220 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11222 emit_insn (gen_cld ());
11224 /* When optimizing for size emit simple rep ; movsb instruction for
11225 counts not divisible by 4. */
11227 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11229 countreg = ix86_zero_extend_to_Pmode (count_exp);
11230 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11232 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11233 destreg, countreg));
11235 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11236 destreg, countreg));
11238 else if (count != 0
11240 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11241 || optimize_size || count < (unsigned int) 64))
11243 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11244 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11245 if (count & ~(size - 1))
11247 countreg = copy_to_mode_reg (counter_mode,
11248 GEN_INT ((count >> (size == 4 ? 2 : 3))
11249 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11250 countreg = ix86_zero_extend_to_Pmode (countreg);
11254 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11255 destreg, countreg));
11257 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11258 destreg, countreg));
11261 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11262 destreg, countreg));
11264 if (size == 8 && (count & 0x04))
11265 emit_insn (gen_strsetsi (destreg,
11266 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11268 emit_insn (gen_strsethi (destreg,
11269 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11271 emit_insn (gen_strsetqi (destreg,
11272 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11278 /* Compute desired alignment of the string operation. */
11279 int desired_alignment = (TARGET_PENTIUMPRO
11280 && (count == 0 || count >= (unsigned int) 260)
11281 ? 8 : UNITS_PER_WORD);
11283 /* In case we don't know anything about the alignment, default to
11284 library version, since it is usually equally fast and result in
11287 Also emit call when we know that the count is large and call overhead
11288 will not be important. */
11289 if (!TARGET_INLINE_ALL_STRINGOPS
11290 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11293 if (TARGET_SINGLE_STRINGOP)
11294 emit_insn (gen_cld ());
11296 countreg2 = gen_reg_rtx (Pmode);
11297 countreg = copy_to_mode_reg (counter_mode, count_exp);
11298 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11300 if (count == 0 && align < desired_alignment)
11302 label = gen_label_rtx ();
11303 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11304 LEU, 0, counter_mode, 1, label);
11308 rtx label = ix86_expand_aligntest (destreg, 1);
11309 emit_insn (gen_strsetqi (destreg,
11310 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11311 ix86_adjust_counter (countreg, 1);
11312 emit_label (label);
11313 LABEL_NUSES (label) = 1;
11317 rtx label = ix86_expand_aligntest (destreg, 2);
11318 emit_insn (gen_strsethi (destreg,
11319 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11320 ix86_adjust_counter (countreg, 2);
11321 emit_label (label);
11322 LABEL_NUSES (label) = 1;
11324 if (align <= 4 && desired_alignment > 4)
11326 rtx label = ix86_expand_aligntest (destreg, 4);
11327 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11328 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11330 ix86_adjust_counter (countreg, 4);
11331 emit_label (label);
11332 LABEL_NUSES (label) = 1;
11335 if (label && desired_alignment > 4 && !TARGET_64BIT)
11337 emit_label (label);
11338 LABEL_NUSES (label) = 1;
11342 if (!TARGET_SINGLE_STRINGOP)
11343 emit_insn (gen_cld ());
11346 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11348 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11349 destreg, countreg2));
11353 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11354 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11355 destreg, countreg2));
11359 emit_label (label);
11360 LABEL_NUSES (label) = 1;
11363 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11364 emit_insn (gen_strsetsi (destreg,
11365 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11366 if (TARGET_64BIT && (align <= 4 || count == 0))
11368 rtx label = ix86_expand_aligntest (countreg, 4);
11369 emit_insn (gen_strsetsi (destreg,
11370 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11371 emit_label (label);
11372 LABEL_NUSES (label) = 1;
11374 if (align > 2 && count != 0 && (count & 2))
11375 emit_insn (gen_strsethi (destreg,
11376 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11377 if (align <= 2 || count == 0)
11379 rtx label = ix86_expand_aligntest (countreg, 2);
11380 emit_insn (gen_strsethi (destreg,
11381 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11382 emit_label (label);
11383 LABEL_NUSES (label) = 1;
11385 if (align > 1 && count != 0 && (count & 1))
11386 emit_insn (gen_strsetqi (destreg,
11387 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11388 if (align <= 1 || count == 0)
11390 rtx label = ix86_expand_aligntest (countreg, 1);
11391 emit_insn (gen_strsetqi (destreg,
11392 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11393 emit_label (label);
11394 LABEL_NUSES (label) = 1;
11399 /* Expand strlen. */
11401 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11403 rtx addr, scratch1, scratch2, scratch3, scratch4;
11405 /* The generic case of strlen expander is long. Avoid it's
11406 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11408 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11409 && !TARGET_INLINE_ALL_STRINGOPS
11411 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11414 addr = force_reg (Pmode, XEXP (src, 0));
11415 scratch1 = gen_reg_rtx (Pmode);
11417 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11420 /* Well it seems that some optimizer does not combine a call like
11421 foo(strlen(bar), strlen(bar));
11422 when the move and the subtraction is done here. It does calculate
11423 the length just once when these instructions are done inside of
11424 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11425 often used and I use one fewer register for the lifetime of
11426 output_strlen_unroll() this is better. */
11428 emit_move_insn (out, addr);
11430 ix86_expand_strlensi_unroll_1 (out, align);
11432 /* strlensi_unroll_1 returns the address of the zero at the end of
11433 the string, like memchr(), so compute the length by subtracting
11434 the start address. */
11436 emit_insn (gen_subdi3 (out, out, addr));
11438 emit_insn (gen_subsi3 (out, out, addr));
11442 scratch2 = gen_reg_rtx (Pmode);
11443 scratch3 = gen_reg_rtx (Pmode);
11444 scratch4 = force_reg (Pmode, constm1_rtx);
11446 emit_move_insn (scratch3, addr);
11447 eoschar = force_reg (QImode, eoschar);
11449 emit_insn (gen_cld ());
11452 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11453 align, scratch4, scratch3));
11454 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11455 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11459 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11460 align, scratch4, scratch3));
11461 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11462 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11468 /* Expand the appropriate insns for doing strlen if not just doing
11471 out = result, initialized with the start address
11472 align_rtx = alignment of the address.
11473 scratch = scratch register, initialized with the startaddress when
11474 not aligned, otherwise undefined
11476 This is just the body. It needs the initializations mentioned above and
11477 some address computing at the end. These things are done in i386.md. */
11480 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11484 rtx align_2_label = NULL_RTX;
11485 rtx align_3_label = NULL_RTX;
11486 rtx align_4_label = gen_label_rtx ();
11487 rtx end_0_label = gen_label_rtx ();
11489 rtx tmpreg = gen_reg_rtx (SImode);
11490 rtx scratch = gen_reg_rtx (SImode);
11494 if (GET_CODE (align_rtx) == CONST_INT)
11495 align = INTVAL (align_rtx);
11497 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11499 /* Is there a known alignment and is it less than 4? */
11502 rtx scratch1 = gen_reg_rtx (Pmode);
11503 emit_move_insn (scratch1, out);
11504 /* Is there a known alignment and is it not 2? */
11507 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11508 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11510 /* Leave just the 3 lower bits. */
11511 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11512 NULL_RTX, 0, OPTAB_WIDEN);
11514 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11515 Pmode, 1, align_4_label);
11516 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11517 Pmode, 1, align_2_label);
11518 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11519 Pmode, 1, align_3_label);
11523 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11524 check if is aligned to 4 - byte. */
11526 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11527 NULL_RTX, 0, OPTAB_WIDEN);
11529 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11530 Pmode, 1, align_4_label);
11533 mem = gen_rtx_MEM (QImode, out);
11535 /* Now compare the bytes. */
11537 /* Compare the first n unaligned byte on a byte per byte basis. */
11538 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11539 QImode, 1, end_0_label);
11541 /* Increment the address. */
11543 emit_insn (gen_adddi3 (out, out, const1_rtx));
11545 emit_insn (gen_addsi3 (out, out, const1_rtx));
11547 /* Not needed with an alignment of 2 */
11550 emit_label (align_2_label);
11552 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11556 emit_insn (gen_adddi3 (out, out, const1_rtx));
11558 emit_insn (gen_addsi3 (out, out, const1_rtx));
11560 emit_label (align_3_label);
11563 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11567 emit_insn (gen_adddi3 (out, out, const1_rtx));
11569 emit_insn (gen_addsi3 (out, out, const1_rtx));
11572 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11573 align this loop. It gives only huge programs, but does not help to
11575 emit_label (align_4_label);
11577 mem = gen_rtx_MEM (SImode, out);
11578 emit_move_insn (scratch, mem);
11580 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11582 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11584 /* This formula yields a nonzero result iff one of the bytes is zero.
11585 This saves three branches inside loop and many cycles. */
11587 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11588 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11589 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11590 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11591 gen_int_mode (0x80808080, SImode)));
11592 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11597 rtx reg = gen_reg_rtx (SImode);
11598 rtx reg2 = gen_reg_rtx (Pmode);
11599 emit_move_insn (reg, tmpreg);
11600 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11602 /* If zero is not in the first two bytes, move two bytes forward. */
11603 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11604 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11605 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11606 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11607 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11610 /* Emit lea manually to avoid clobbering of flags. */
11611 emit_insn (gen_rtx_SET (SImode, reg2,
11612 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11614 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11615 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11616 emit_insn (gen_rtx_SET (VOIDmode, out,
11617 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11624 rtx end_2_label = gen_label_rtx ();
11625 /* Is zero in the first two bytes? */
11627 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11628 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11629 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11630 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11631 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11633 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11634 JUMP_LABEL (tmp) = end_2_label;
11636 /* Not in the first two. Move two bytes forward. */
11637 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11639 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11641 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11643 emit_label (end_2_label);
11647 /* Avoid branch in fixing the byte. */
11648 tmpreg = gen_lowpart (QImode, tmpreg);
11649 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11650 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11652 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11654 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11656 emit_label (end_0_label);
11660 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11661 rtx callarg2 ATTRIBUTE_UNUSED,
11662 rtx pop, int sibcall)
11664 rtx use = NULL, call;
11666 if (pop == const0_rtx)
11668 if (TARGET_64BIT && pop)
11672 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11673 fnaddr = machopic_indirect_call_target (fnaddr);
11675 /* Static functions and indirect calls don't need the pic register. */
11676 if (! TARGET_64BIT && flag_pic
11677 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11678 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11679 use_reg (&use, pic_offset_table_rtx);
11681 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11683 rtx al = gen_rtx_REG (QImode, 0);
11684 emit_move_insn (al, callarg2);
11685 use_reg (&use, al);
11687 #endif /* TARGET_MACHO */
11689 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11691 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11692 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11694 if (sibcall && TARGET_64BIT
11695 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11698 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11699 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11700 emit_move_insn (fnaddr, addr);
11701 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11704 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11706 call = gen_rtx_SET (VOIDmode, retval, call);
11709 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11710 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11711 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11714 call = emit_call_insn (call);
11716 CALL_INSN_FUNCTION_USAGE (call) = use;
11720 /* Clear stack slot assignments remembered from previous functions.
11721 This is called from INIT_EXPANDERS once before RTL is emitted for each
11724 static struct machine_function *
11725 ix86_init_machine_status (void)
11727 struct machine_function *f;
11729 f = ggc_alloc_cleared (sizeof (struct machine_function));
11730 f->use_fast_prologue_epilogue_nregs = -1;
11735 /* Return a MEM corresponding to a stack slot with mode MODE.
11736 Allocate a new slot if necessary.
11738 The RTL for a function can have several slots available: N is
11739 which slot to use. */
11742 assign_386_stack_local (enum machine_mode mode, int n)
11744 struct stack_local_entry *s;
11746 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11749 for (s = ix86_stack_locals; s; s = s->next)
11750 if (s->mode == mode && s->n == n)
11753 s = (struct stack_local_entry *)
11754 ggc_alloc (sizeof (struct stack_local_entry));
11757 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11759 s->next = ix86_stack_locals;
11760 ix86_stack_locals = s;
11764 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11766 static GTY(()) rtx ix86_tls_symbol;
11768 ix86_tls_get_addr (void)
11771 if (!ix86_tls_symbol)
11773 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11774 (TARGET_GNU_TLS && !TARGET_64BIT)
11775 ? "___tls_get_addr"
11776 : "__tls_get_addr");
11779 return ix86_tls_symbol;
11782 /* Calculate the length of the memory address in the instruction
11783 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11786 memory_address_length (rtx addr)
11788 struct ix86_address parts;
11789 rtx base, index, disp;
11792 if (GET_CODE (addr) == PRE_DEC
11793 || GET_CODE (addr) == POST_INC
11794 || GET_CODE (addr) == PRE_MODIFY
11795 || GET_CODE (addr) == POST_MODIFY)
11798 if (! ix86_decompose_address (addr, &parts))
11802 index = parts.index;
11807 - esp as the base always wants an index,
11808 - ebp as the base always wants a displacement. */
11810 /* Register Indirect. */
11811 if (base && !index && !disp)
11813 /* esp (for its index) and ebp (for its displacement) need
11814 the two-byte modrm form. */
11815 if (addr == stack_pointer_rtx
11816 || addr == arg_pointer_rtx
11817 || addr == frame_pointer_rtx
11818 || addr == hard_frame_pointer_rtx)
11822 /* Direct Addressing. */
11823 else if (disp && !base && !index)
11828 /* Find the length of the displacement constant. */
11831 if (GET_CODE (disp) == CONST_INT
11832 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11838 /* ebp always wants a displacement. */
11839 else if (base == hard_frame_pointer_rtx)
11842 /* An index requires the two-byte modrm form.... */
11844 /* ...like esp, which always wants an index. */
11845 || base == stack_pointer_rtx
11846 || base == arg_pointer_rtx
11847 || base == frame_pointer_rtx)
11854 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11855 is set, expect that insn have 8bit immediate alternative. */
11857 ix86_attr_length_immediate_default (rtx insn, int shortform)
11861 extract_insn_cached (insn);
11862 for (i = recog_data.n_operands - 1; i >= 0; --i)
11863 if (CONSTANT_P (recog_data.operand[i]))
11868 && GET_CODE (recog_data.operand[i]) == CONST_INT
11869 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11873 switch (get_attr_mode (insn))
11884 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11889 fatal_insn ("unknown insn mode", insn);
11895 /* Compute default value for "length_address" attribute. */
11897 ix86_attr_length_address_default (rtx insn)
11901 if (get_attr_type (insn) == TYPE_LEA)
11903 rtx set = PATTERN (insn);
11904 if (GET_CODE (set) == SET)
11906 else if (GET_CODE (set) == PARALLEL
11907 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11908 set = XVECEXP (set, 0, 0);
11911 #ifdef ENABLE_CHECKING
11917 return memory_address_length (SET_SRC (set));
11920 extract_insn_cached (insn);
11921 for (i = recog_data.n_operands - 1; i >= 0; --i)
11922 if (GET_CODE (recog_data.operand[i]) == MEM)
11924 return memory_address_length (XEXP (recog_data.operand[i], 0));
11930 /* Return the maximum number of instructions a cpu can issue. */
11933 ix86_issue_rate (void)
11937 case PROCESSOR_PENTIUM:
11941 case PROCESSOR_PENTIUMPRO:
11942 case PROCESSOR_PENTIUM4:
11943 case PROCESSOR_ATHLON:
11952 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11953 by DEP_INSN and nothing set by DEP_INSN. */
11956 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11960 /* Simplify the test for uninteresting insns. */
11961 if (insn_type != TYPE_SETCC
11962 && insn_type != TYPE_ICMOV
11963 && insn_type != TYPE_FCMOV
11964 && insn_type != TYPE_IBR)
11967 if ((set = single_set (dep_insn)) != 0)
11969 set = SET_DEST (set);
11972 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11973 && XVECLEN (PATTERN (dep_insn), 0) == 2
11974 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11975 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11977 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11978 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11983 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11986 /* This test is true if the dependent insn reads the flags but
11987 not any other potentially set register. */
11988 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11991 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11997 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11998 address with operands set by DEP_INSN. */
12001 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12005 if (insn_type == TYPE_LEA
12008 addr = PATTERN (insn);
12009 if (GET_CODE (addr) == SET)
12011 else if (GET_CODE (addr) == PARALLEL
12012 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12013 addr = XVECEXP (addr, 0, 0);
12016 addr = SET_SRC (addr);
12021 extract_insn_cached (insn);
12022 for (i = recog_data.n_operands - 1; i >= 0; --i)
12023 if (GET_CODE (recog_data.operand[i]) == MEM)
12025 addr = XEXP (recog_data.operand[i], 0);
12032 return modified_in_p (addr, dep_insn);
12036 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12038 enum attr_type insn_type, dep_insn_type;
12039 enum attr_memory memory, dep_memory;
12041 int dep_insn_code_number;
12043 /* Anti and output dependencies have zero cost on all CPUs. */
12044 if (REG_NOTE_KIND (link) != 0)
12047 dep_insn_code_number = recog_memoized (dep_insn);
12049 /* If we can't recognize the insns, we can't really do anything. */
12050 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12053 insn_type = get_attr_type (insn);
12054 dep_insn_type = get_attr_type (dep_insn);
12058 case PROCESSOR_PENTIUM:
12059 /* Address Generation Interlock adds a cycle of latency. */
12060 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12063 /* ??? Compares pair with jump/setcc. */
12064 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12067 /* Floating point stores require value to be ready one cycle earlier. */
12068 if (insn_type == TYPE_FMOV
12069 && get_attr_memory (insn) == MEMORY_STORE
12070 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12074 case PROCESSOR_PENTIUMPRO:
12075 memory = get_attr_memory (insn);
12076 dep_memory = get_attr_memory (dep_insn);
12078 /* Since we can't represent delayed latencies of load+operation,
12079 increase the cost here for non-imov insns. */
12080 if (dep_insn_type != TYPE_IMOV
12081 && dep_insn_type != TYPE_FMOV
12082 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12085 /* INT->FP conversion is expensive. */
12086 if (get_attr_fp_int_src (dep_insn))
12089 /* There is one cycle extra latency between an FP op and a store. */
12090 if (insn_type == TYPE_FMOV
12091 && (set = single_set (dep_insn)) != NULL_RTX
12092 && (set2 = single_set (insn)) != NULL_RTX
12093 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12094 && GET_CODE (SET_DEST (set2)) == MEM)
12097 /* Show ability of reorder buffer to hide latency of load by executing
12098 in parallel with previous instruction in case
12099 previous instruction is not needed to compute the address. */
12100 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12101 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12103 /* Claim moves to take one cycle, as core can issue one load
12104 at time and the next load can start cycle later. */
12105 if (dep_insn_type == TYPE_IMOV
12106 || dep_insn_type == TYPE_FMOV)
12114 memory = get_attr_memory (insn);
12115 dep_memory = get_attr_memory (dep_insn);
12116 /* The esp dependency is resolved before the instruction is really
12118 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12119 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12122 /* Since we can't represent delayed latencies of load+operation,
12123 increase the cost here for non-imov insns. */
12124 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12125 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12127 /* INT->FP conversion is expensive. */
12128 if (get_attr_fp_int_src (dep_insn))
12131 /* Show ability of reorder buffer to hide latency of load by executing
12132 in parallel with previous instruction in case
12133 previous instruction is not needed to compute the address. */
12134 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12135 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12137 /* Claim moves to take one cycle, as core can issue one load
12138 at time and the next load can start cycle later. */
12139 if (dep_insn_type == TYPE_IMOV
12140 || dep_insn_type == TYPE_FMOV)
12149 case PROCESSOR_ATHLON:
12151 memory = get_attr_memory (insn);
12152 dep_memory = get_attr_memory (dep_insn);
12154 /* Show ability of reorder buffer to hide latency of load by executing
12155 in parallel with previous instruction in case
12156 previous instruction is not needed to compute the address. */
12157 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12158 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12160 enum attr_unit unit = get_attr_unit (insn);
12163 /* Because of the difference between the length of integer and
12164 floating unit pipeline preparation stages, the memory operands
12165 for floating point are cheaper.
12167 ??? For Athlon it the difference is most probably 2. */
12168 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12171 loadcost = TARGET_ATHLON ? 2 : 0;
12173 if (cost >= loadcost)
12188 struct ppro_sched_data
12191 int issued_this_cycle;
12195 static enum attr_ppro_uops
12196 ix86_safe_ppro_uops (rtx insn)
12198 if (recog_memoized (insn) >= 0)
12199 return get_attr_ppro_uops (insn);
12201 return PPRO_UOPS_MANY;
12205 ix86_dump_ppro_packet (FILE *dump)
12207 if (ix86_sched_data.ppro.decode[0])
12209 fprintf (dump, "PPRO packet: %d",
12210 INSN_UID (ix86_sched_data.ppro.decode[0]));
12211 if (ix86_sched_data.ppro.decode[1])
12212 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12213 if (ix86_sched_data.ppro.decode[2])
12214 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12215 fputc ('\n', dump);
12219 /* We're beginning a new block. Initialize data structures as necessary. */
12222 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12223 int sched_verbose ATTRIBUTE_UNUSED,
12224 int veclen ATTRIBUTE_UNUSED)
12226 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12229 /* Shift INSN to SLOT, and shift everything else down. */
12232 ix86_reorder_insn (rtx *insnp, rtx *slot)
12238 insnp[0] = insnp[1];
12239 while (++insnp != slot);
12245 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12248 enum attr_ppro_uops cur_uops;
12249 int issued_this_cycle;
12253 /* At this point .ppro.decode contains the state of the three
12254 decoders from last "cycle". That is, those insns that were
12255 actually independent. But here we're scheduling for the
12256 decoder, and we may find things that are decodable in the
12259 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12260 issued_this_cycle = 0;
12263 cur_uops = ix86_safe_ppro_uops (*insnp);
12265 /* If the decoders are empty, and we've a complex insn at the
12266 head of the priority queue, let it issue without complaint. */
12267 if (decode[0] == NULL)
12269 if (cur_uops == PPRO_UOPS_MANY)
12271 decode[0] = *insnp;
12275 /* Otherwise, search for a 2-4 uop unsn to issue. */
12276 while (cur_uops != PPRO_UOPS_FEW)
12278 if (insnp == ready)
12280 cur_uops = ix86_safe_ppro_uops (*--insnp);
12283 /* If so, move it to the head of the line. */
12284 if (cur_uops == PPRO_UOPS_FEW)
12285 ix86_reorder_insn (insnp, e_ready);
12287 /* Issue the head of the queue. */
12288 issued_this_cycle = 1;
12289 decode[0] = *e_ready--;
12292 /* Look for simple insns to fill in the other two slots. */
12293 for (i = 1; i < 3; ++i)
12294 if (decode[i] == NULL)
12296 if (ready > e_ready)
12300 cur_uops = ix86_safe_ppro_uops (*insnp);
12301 while (cur_uops != PPRO_UOPS_ONE)
12303 if (insnp == ready)
12305 cur_uops = ix86_safe_ppro_uops (*--insnp);
12308 /* Found one. Move it to the head of the queue and issue it. */
12309 if (cur_uops == PPRO_UOPS_ONE)
12311 ix86_reorder_insn (insnp, e_ready);
12312 decode[i] = *e_ready--;
12313 issued_this_cycle++;
12317 /* ??? Didn't find one. Ideally, here we would do a lazy split
12318 of 2-uop insns, issue one and queue the other. */
12322 if (issued_this_cycle == 0)
12323 issued_this_cycle = 1;
12324 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12327 /* We are about to being issuing insns for this clock cycle.
12328 Override the default sort algorithm to better slot instructions. */
12330 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12331 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12332 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12334 int n_ready = *n_readyp;
12335 rtx *e_ready = ready + n_ready - 1;
12337 /* Make sure to go ahead and initialize key items in
12338 ix86_sched_data if we are not going to bother trying to
12339 reorder the ready queue. */
12342 ix86_sched_data.ppro.issued_this_cycle = 1;
12351 case PROCESSOR_PENTIUMPRO:
12352 ix86_sched_reorder_ppro (ready, e_ready);
12357 return ix86_issue_rate ();
12360 /* We are about to issue INSN. Return the number of insns left on the
12361 ready queue that can be issued this cycle. */
12364 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12365 int can_issue_more)
12371 return can_issue_more - 1;
12373 case PROCESSOR_PENTIUMPRO:
12375 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12377 if (uops == PPRO_UOPS_MANY)
12380 ix86_dump_ppro_packet (dump);
12381 ix86_sched_data.ppro.decode[0] = insn;
12382 ix86_sched_data.ppro.decode[1] = NULL;
12383 ix86_sched_data.ppro.decode[2] = NULL;
12385 ix86_dump_ppro_packet (dump);
12386 ix86_sched_data.ppro.decode[0] = NULL;
12388 else if (uops == PPRO_UOPS_FEW)
12391 ix86_dump_ppro_packet (dump);
12392 ix86_sched_data.ppro.decode[0] = insn;
12393 ix86_sched_data.ppro.decode[1] = NULL;
12394 ix86_sched_data.ppro.decode[2] = NULL;
12398 for (i = 0; i < 3; ++i)
12399 if (ix86_sched_data.ppro.decode[i] == NULL)
12401 ix86_sched_data.ppro.decode[i] = insn;
12409 ix86_dump_ppro_packet (dump);
12410 ix86_sched_data.ppro.decode[0] = NULL;
12411 ix86_sched_data.ppro.decode[1] = NULL;
12412 ix86_sched_data.ppro.decode[2] = NULL;
12416 return --ix86_sched_data.ppro.issued_this_cycle;
12421 ia32_use_dfa_pipeline_interface (void)
12423 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12428 /* How many alternative schedules to try. This should be as wide as the
12429 scheduling freedom in the DFA, but no wider. Making this value too
12430 large results extra work for the scheduler. */
12433 ia32_multipass_dfa_lookahead (void)
12435 if (ix86_tune == PROCESSOR_PENTIUM)
12442 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12443 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12447 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12452 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12454 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12458 /* Subroutine of above to actually do the updating by recursively walking
12462 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12465 enum rtx_code code = GET_CODE (x);
12466 const char *format_ptr = GET_RTX_FORMAT (code);
12469 if (code == MEM && XEXP (x, 0) == dstreg)
12470 MEM_COPY_ATTRIBUTES (x, dstref);
12471 else if (code == MEM && XEXP (x, 0) == srcreg)
12472 MEM_COPY_ATTRIBUTES (x, srcref);
12474 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12476 if (*format_ptr == 'e')
12477 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12479 else if (*format_ptr == 'E')
12480 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12481 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12486 /* Compute the alignment given to a constant that is being placed in memory.
12487 EXP is the constant and ALIGN is the alignment that the object would
12489 The value of this function is used instead of that alignment to align
12493 ix86_constant_alignment (tree exp, int align)
12495 if (TREE_CODE (exp) == REAL_CST)
12497 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12499 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12502 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12509 /* Compute the alignment for a static variable.
12510 TYPE is the data type, and ALIGN is the alignment that
12511 the object would ordinarily have. The value of this function is used
12512 instead of that alignment to align the object. */
12515 ix86_data_alignment (tree type, int align)
12517 if (AGGREGATE_TYPE_P (type)
12518 && TYPE_SIZE (type)
12519 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12520 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12521 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12524 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12525 to 16byte boundary. */
12528 if (AGGREGATE_TYPE_P (type)
12529 && TYPE_SIZE (type)
12530 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12531 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12532 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12536 if (TREE_CODE (type) == ARRAY_TYPE)
12538 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12540 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12543 else if (TREE_CODE (type) == COMPLEX_TYPE)
12546 if (TYPE_MODE (type) == DCmode && align < 64)
12548 if (TYPE_MODE (type) == XCmode && align < 128)
12551 else if ((TREE_CODE (type) == RECORD_TYPE
12552 || TREE_CODE (type) == UNION_TYPE
12553 || TREE_CODE (type) == QUAL_UNION_TYPE)
12554 && TYPE_FIELDS (type))
12556 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12558 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12561 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12562 || TREE_CODE (type) == INTEGER_TYPE)
12564 if (TYPE_MODE (type) == DFmode && align < 64)
12566 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12573 /* Compute the alignment for a local variable.
12574 TYPE is the data type, and ALIGN is the alignment that
12575 the object would ordinarily have. The value of this macro is used
12576 instead of that alignment to align the object. */
12579 ix86_local_alignment (tree type, int align)
12581 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12582 to 16byte boundary. */
12585 if (AGGREGATE_TYPE_P (type)
12586 && TYPE_SIZE (type)
12587 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12588 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12589 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12592 if (TREE_CODE (type) == ARRAY_TYPE)
12594 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12596 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12599 else if (TREE_CODE (type) == COMPLEX_TYPE)
12601 if (TYPE_MODE (type) == DCmode && align < 64)
12603 if (TYPE_MODE (type) == XCmode && align < 128)
12606 else if ((TREE_CODE (type) == RECORD_TYPE
12607 || TREE_CODE (type) == UNION_TYPE
12608 || TREE_CODE (type) == QUAL_UNION_TYPE)
12609 && TYPE_FIELDS (type))
12611 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12613 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12616 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12617 || TREE_CODE (type) == INTEGER_TYPE)
12620 if (TYPE_MODE (type) == DFmode && align < 64)
12622 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12628 /* Emit RTL insns to initialize the variable parts of a trampoline.
12629 FNADDR is an RTX for the address of the function's pure code.
12630 CXT is an RTX for the static chain value for the function. */
12632 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12636 /* Compute offset from the end of the jmp to the target function. */
12637 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12638 plus_constant (tramp, 10),
12639 NULL_RTX, 1, OPTAB_DIRECT);
12640 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12641 gen_int_mode (0xb9, QImode));
12642 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12643 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12644 gen_int_mode (0xe9, QImode));
12645 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12650 /* Try to load address using shorter movl instead of movabs.
12651 We may want to support movq for kernel mode, but kernel does not use
12652 trampolines at the moment. */
12653 if (x86_64_zero_extended_value (fnaddr))
12655 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12656 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12657 gen_int_mode (0xbb41, HImode));
12658 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12659 gen_lowpart (SImode, fnaddr));
12664 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12665 gen_int_mode (0xbb49, HImode));
12666 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12670 /* Load static chain using movabs to r10. */
12671 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12672 gen_int_mode (0xba49, HImode));
12673 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12676 /* Jump to the r11 */
12677 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12678 gen_int_mode (0xff49, HImode));
12679 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12680 gen_int_mode (0xe3, QImode));
12682 if (offset > TRAMPOLINE_SIZE)
12686 #ifdef TRANSFER_FROM_TRAMPOLINE
12687 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12688 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12692 #define def_builtin(MASK, NAME, TYPE, CODE) \
12694 if ((MASK) & target_flags \
12695 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12696 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12697 NULL, NULL_TREE); \
12700 struct builtin_description
12702 const unsigned int mask;
12703 const enum insn_code icode;
12704 const char *const name;
12705 const enum ix86_builtins code;
12706 const enum rtx_code comparison;
12707 const unsigned int flag;
12710 static const struct builtin_description bdesc_comi[] =
12712 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12713 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12714 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12715 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12716 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12717 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12718 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12719 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12720 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12721 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12722 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12723 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12726 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12729 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12734 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12735 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12738 static const struct builtin_description bdesc_2arg[] =
12741 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12742 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12743 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12744 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12745 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12746 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12747 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12748 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12750 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12751 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12752 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12753 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12754 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12755 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12756 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12757 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12758 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12759 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12760 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12761 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12762 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12763 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12764 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12765 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12766 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12767 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12768 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12769 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12771 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12772 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12773 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12774 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12776 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12777 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12778 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12779 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12781 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12782 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12783 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12784 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12785 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12788 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12789 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12790 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12791 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12792 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12793 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12794 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12795 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12797 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12798 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12799 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12800 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12801 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12802 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12803 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12804 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12806 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12807 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12808 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12810 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12811 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12812 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12813 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12815 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12816 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12818 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12819 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12820 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12821 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12822 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12823 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12825 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12826 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12827 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12828 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12830 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12831 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12832 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12833 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12834 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12835 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12838 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12839 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12840 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12842 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12843 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12844 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12846 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12847 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12848 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12849 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12850 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12851 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12853 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12854 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12855 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12856 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12857 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12858 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12860 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12861 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12862 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12863 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12865 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12866 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12869 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12870 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12871 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12872 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12873 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12874 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12879 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12880 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12881 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12882 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12883 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12884 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12885 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12886 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12887 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12888 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12889 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12890 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12891 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12892 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12893 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12894 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12895 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12896 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12897 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12899 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12904 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12906 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12907 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12909 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12910 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12911 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12914 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12915 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12916 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12917 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12918 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12919 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12920 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12921 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12923 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12924 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12925 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12926 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12927 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12928 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12929 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12930 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12935 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12939 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12940 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12953 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12954 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12955 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12957 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12958 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12963 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12968 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12971 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12995 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12996 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13000 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13001 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13002 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13003 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13004 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13005 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13008 static const struct builtin_description bdesc_1arg[] =
13010 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13011 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13013 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13014 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13015 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13017 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13018 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13019 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13020 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13021 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13022 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13044 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13045 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13054 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13055 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13056 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13060 ix86_init_builtins (void)
13063 ix86_init_mmx_sse_builtins ();
13066 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13067 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13070 ix86_init_mmx_sse_builtins (void)
13072 const struct builtin_description * d;
13075 tree pchar_type_node = build_pointer_type (char_type_node);
13076 tree pcchar_type_node = build_pointer_type (
13077 build_type_variant (char_type_node, 1, 0));
13078 tree pfloat_type_node = build_pointer_type (float_type_node);
13079 tree pcfloat_type_node = build_pointer_type (
13080 build_type_variant (float_type_node, 1, 0));
13081 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13082 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13083 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13086 tree int_ftype_v4sf_v4sf
13087 = build_function_type_list (integer_type_node,
13088 V4SF_type_node, V4SF_type_node, NULL_TREE);
13089 tree v4si_ftype_v4sf_v4sf
13090 = build_function_type_list (V4SI_type_node,
13091 V4SF_type_node, V4SF_type_node, NULL_TREE);
13092 /* MMX/SSE/integer conversions. */
13093 tree int_ftype_v4sf
13094 = build_function_type_list (integer_type_node,
13095 V4SF_type_node, NULL_TREE);
13096 tree int64_ftype_v4sf
13097 = build_function_type_list (long_long_integer_type_node,
13098 V4SF_type_node, NULL_TREE);
13099 tree int_ftype_v8qi
13100 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13101 tree v4sf_ftype_v4sf_int
13102 = build_function_type_list (V4SF_type_node,
13103 V4SF_type_node, integer_type_node, NULL_TREE);
13104 tree v4sf_ftype_v4sf_int64
13105 = build_function_type_list (V4SF_type_node,
13106 V4SF_type_node, long_long_integer_type_node,
13108 tree v4sf_ftype_v4sf_v2si
13109 = build_function_type_list (V4SF_type_node,
13110 V4SF_type_node, V2SI_type_node, NULL_TREE);
13111 tree int_ftype_v4hi_int
13112 = build_function_type_list (integer_type_node,
13113 V4HI_type_node, integer_type_node, NULL_TREE);
13114 tree v4hi_ftype_v4hi_int_int
13115 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13116 integer_type_node, integer_type_node,
13118 /* Miscellaneous. */
13119 tree v8qi_ftype_v4hi_v4hi
13120 = build_function_type_list (V8QI_type_node,
13121 V4HI_type_node, V4HI_type_node, NULL_TREE);
13122 tree v4hi_ftype_v2si_v2si
13123 = build_function_type_list (V4HI_type_node,
13124 V2SI_type_node, V2SI_type_node, NULL_TREE);
13125 tree v4sf_ftype_v4sf_v4sf_int
13126 = build_function_type_list (V4SF_type_node,
13127 V4SF_type_node, V4SF_type_node,
13128 integer_type_node, NULL_TREE);
13129 tree v2si_ftype_v4hi_v4hi
13130 = build_function_type_list (V2SI_type_node,
13131 V4HI_type_node, V4HI_type_node, NULL_TREE);
13132 tree v4hi_ftype_v4hi_int
13133 = build_function_type_list (V4HI_type_node,
13134 V4HI_type_node, integer_type_node, NULL_TREE);
13135 tree v4hi_ftype_v4hi_di
13136 = build_function_type_list (V4HI_type_node,
13137 V4HI_type_node, long_long_unsigned_type_node,
13139 tree v2si_ftype_v2si_di
13140 = build_function_type_list (V2SI_type_node,
13141 V2SI_type_node, long_long_unsigned_type_node,
13143 tree void_ftype_void
13144 = build_function_type (void_type_node, void_list_node);
13145 tree void_ftype_unsigned
13146 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13147 tree void_ftype_unsigned_unsigned
13148 = build_function_type_list (void_type_node, unsigned_type_node,
13149 unsigned_type_node, NULL_TREE);
13150 tree void_ftype_pcvoid_unsigned_unsigned
13151 = build_function_type_list (void_type_node, const_ptr_type_node,
13152 unsigned_type_node, unsigned_type_node,
13154 tree unsigned_ftype_void
13155 = build_function_type (unsigned_type_node, void_list_node);
13157 = build_function_type (long_long_unsigned_type_node, void_list_node);
13158 tree v4sf_ftype_void
13159 = build_function_type (V4SF_type_node, void_list_node);
13160 tree v2si_ftype_v4sf
13161 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13162 /* Loads/stores. */
13163 tree void_ftype_v8qi_v8qi_pchar
13164 = build_function_type_list (void_type_node,
13165 V8QI_type_node, V8QI_type_node,
13166 pchar_type_node, NULL_TREE);
13167 tree v4sf_ftype_pcfloat
13168 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13169 /* @@@ the type is bogus */
13170 tree v4sf_ftype_v4sf_pv2si
13171 = build_function_type_list (V4SF_type_node,
13172 V4SF_type_node, pv2si_type_node, NULL_TREE);
13173 tree void_ftype_pv2si_v4sf
13174 = build_function_type_list (void_type_node,
13175 pv2si_type_node, V4SF_type_node, NULL_TREE);
13176 tree void_ftype_pfloat_v4sf
13177 = build_function_type_list (void_type_node,
13178 pfloat_type_node, V4SF_type_node, NULL_TREE);
13179 tree void_ftype_pdi_di
13180 = build_function_type_list (void_type_node,
13181 pdi_type_node, long_long_unsigned_type_node,
13183 tree void_ftype_pv2di_v2di
13184 = build_function_type_list (void_type_node,
13185 pv2di_type_node, V2DI_type_node, NULL_TREE);
13186 /* Normal vector unops. */
13187 tree v4sf_ftype_v4sf
13188 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13190 /* Normal vector binops. */
13191 tree v4sf_ftype_v4sf_v4sf
13192 = build_function_type_list (V4SF_type_node,
13193 V4SF_type_node, V4SF_type_node, NULL_TREE);
13194 tree v8qi_ftype_v8qi_v8qi
13195 = build_function_type_list (V8QI_type_node,
13196 V8QI_type_node, V8QI_type_node, NULL_TREE);
13197 tree v4hi_ftype_v4hi_v4hi
13198 = build_function_type_list (V4HI_type_node,
13199 V4HI_type_node, V4HI_type_node, NULL_TREE);
13200 tree v2si_ftype_v2si_v2si
13201 = build_function_type_list (V2SI_type_node,
13202 V2SI_type_node, V2SI_type_node, NULL_TREE);
13203 tree di_ftype_di_di
13204 = build_function_type_list (long_long_unsigned_type_node,
13205 long_long_unsigned_type_node,
13206 long_long_unsigned_type_node, NULL_TREE);
13208 tree v2si_ftype_v2sf
13209 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13210 tree v2sf_ftype_v2si
13211 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13212 tree v2si_ftype_v2si
13213 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13214 tree v2sf_ftype_v2sf
13215 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13216 tree v2sf_ftype_v2sf_v2sf
13217 = build_function_type_list (V2SF_type_node,
13218 V2SF_type_node, V2SF_type_node, NULL_TREE);
13219 tree v2si_ftype_v2sf_v2sf
13220 = build_function_type_list (V2SI_type_node,
13221 V2SF_type_node, V2SF_type_node, NULL_TREE);
13222 tree pint_type_node = build_pointer_type (integer_type_node);
13223 tree pcint_type_node = build_pointer_type (
13224 build_type_variant (integer_type_node, 1, 0));
13225 tree pdouble_type_node = build_pointer_type (double_type_node);
13226 tree pcdouble_type_node = build_pointer_type (
13227 build_type_variant (double_type_node, 1, 0));
13228 tree int_ftype_v2df_v2df
13229 = build_function_type_list (integer_type_node,
13230 V2DF_type_node, V2DF_type_node, NULL_TREE);
13233 = build_function_type (intTI_type_node, void_list_node);
13234 tree v2di_ftype_void
13235 = build_function_type (V2DI_type_node, void_list_node);
13236 tree ti_ftype_ti_ti
13237 = build_function_type_list (intTI_type_node,
13238 intTI_type_node, intTI_type_node, NULL_TREE);
13239 tree void_ftype_pcvoid
13240 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13242 = build_function_type_list (V2DI_type_node,
13243 long_long_unsigned_type_node, NULL_TREE);
13245 = build_function_type_list (long_long_unsigned_type_node,
13246 V2DI_type_node, NULL_TREE);
13247 tree v4sf_ftype_v4si
13248 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13249 tree v4si_ftype_v4sf
13250 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13251 tree v2df_ftype_v4si
13252 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13253 tree v4si_ftype_v2df
13254 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13255 tree v2si_ftype_v2df
13256 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13257 tree v4sf_ftype_v2df
13258 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13259 tree v2df_ftype_v2si
13260 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13261 tree v2df_ftype_v4sf
13262 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13263 tree int_ftype_v2df
13264 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13265 tree int64_ftype_v2df
13266 = build_function_type_list (long_long_integer_type_node,
13267 V2DF_type_node, NULL_TREE);
13268 tree v2df_ftype_v2df_int
13269 = build_function_type_list (V2DF_type_node,
13270 V2DF_type_node, integer_type_node, NULL_TREE);
13271 tree v2df_ftype_v2df_int64
13272 = build_function_type_list (V2DF_type_node,
13273 V2DF_type_node, long_long_integer_type_node,
13275 tree v4sf_ftype_v4sf_v2df
13276 = build_function_type_list (V4SF_type_node,
13277 V4SF_type_node, V2DF_type_node, NULL_TREE);
13278 tree v2df_ftype_v2df_v4sf
13279 = build_function_type_list (V2DF_type_node,
13280 V2DF_type_node, V4SF_type_node, NULL_TREE);
13281 tree v2df_ftype_v2df_v2df_int
13282 = build_function_type_list (V2DF_type_node,
13283 V2DF_type_node, V2DF_type_node,
13286 tree v2df_ftype_v2df_pv2si
13287 = build_function_type_list (V2DF_type_node,
13288 V2DF_type_node, pv2si_type_node, NULL_TREE);
13289 tree void_ftype_pv2si_v2df
13290 = build_function_type_list (void_type_node,
13291 pv2si_type_node, V2DF_type_node, NULL_TREE);
13292 tree void_ftype_pdouble_v2df
13293 = build_function_type_list (void_type_node,
13294 pdouble_type_node, V2DF_type_node, NULL_TREE);
13295 tree void_ftype_pint_int
13296 = build_function_type_list (void_type_node,
13297 pint_type_node, integer_type_node, NULL_TREE);
13298 tree void_ftype_v16qi_v16qi_pchar
13299 = build_function_type_list (void_type_node,
13300 V16QI_type_node, V16QI_type_node,
13301 pchar_type_node, NULL_TREE);
13302 tree v2df_ftype_pcdouble
13303 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13304 tree v2df_ftype_v2df_v2df
13305 = build_function_type_list (V2DF_type_node,
13306 V2DF_type_node, V2DF_type_node, NULL_TREE);
13307 tree v16qi_ftype_v16qi_v16qi
13308 = build_function_type_list (V16QI_type_node,
13309 V16QI_type_node, V16QI_type_node, NULL_TREE);
13310 tree v8hi_ftype_v8hi_v8hi
13311 = build_function_type_list (V8HI_type_node,
13312 V8HI_type_node, V8HI_type_node, NULL_TREE);
13313 tree v4si_ftype_v4si_v4si
13314 = build_function_type_list (V4SI_type_node,
13315 V4SI_type_node, V4SI_type_node, NULL_TREE);
13316 tree v2di_ftype_v2di_v2di
13317 = build_function_type_list (V2DI_type_node,
13318 V2DI_type_node, V2DI_type_node, NULL_TREE);
13319 tree v2di_ftype_v2df_v2df
13320 = build_function_type_list (V2DI_type_node,
13321 V2DF_type_node, V2DF_type_node, NULL_TREE);
13322 tree v2df_ftype_v2df
13323 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13324 tree v2df_ftype_double
13325 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13326 tree v2df_ftype_double_double
13327 = build_function_type_list (V2DF_type_node,
13328 double_type_node, double_type_node, NULL_TREE);
13329 tree int_ftype_v8hi_int
13330 = build_function_type_list (integer_type_node,
13331 V8HI_type_node, integer_type_node, NULL_TREE);
13332 tree v8hi_ftype_v8hi_int_int
13333 = build_function_type_list (V8HI_type_node,
13334 V8HI_type_node, integer_type_node,
13335 integer_type_node, NULL_TREE);
13336 tree v2di_ftype_v2di_int
13337 = build_function_type_list (V2DI_type_node,
13338 V2DI_type_node, integer_type_node, NULL_TREE);
13339 tree v4si_ftype_v4si_int
13340 = build_function_type_list (V4SI_type_node,
13341 V4SI_type_node, integer_type_node, NULL_TREE);
13342 tree v8hi_ftype_v8hi_int
13343 = build_function_type_list (V8HI_type_node,
13344 V8HI_type_node, integer_type_node, NULL_TREE);
13345 tree v8hi_ftype_v8hi_v2di
13346 = build_function_type_list (V8HI_type_node,
13347 V8HI_type_node, V2DI_type_node, NULL_TREE);
13348 tree v4si_ftype_v4si_v2di
13349 = build_function_type_list (V4SI_type_node,
13350 V4SI_type_node, V2DI_type_node, NULL_TREE);
13351 tree v4si_ftype_v8hi_v8hi
13352 = build_function_type_list (V4SI_type_node,
13353 V8HI_type_node, V8HI_type_node, NULL_TREE);
13354 tree di_ftype_v8qi_v8qi
13355 = build_function_type_list (long_long_unsigned_type_node,
13356 V8QI_type_node, V8QI_type_node, NULL_TREE);
13357 tree v2di_ftype_v16qi_v16qi
13358 = build_function_type_list (V2DI_type_node,
13359 V16QI_type_node, V16QI_type_node, NULL_TREE);
13360 tree int_ftype_v16qi
13361 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13362 tree v16qi_ftype_pcchar
13363 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13364 tree void_ftype_pchar_v16qi
13365 = build_function_type_list (void_type_node,
13366 pchar_type_node, V16QI_type_node, NULL_TREE);
13367 tree v4si_ftype_pcint
13368 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13369 tree void_ftype_pcint_v4si
13370 = build_function_type_list (void_type_node,
13371 pcint_type_node, V4SI_type_node, NULL_TREE);
13372 tree v2di_ftype_v2di
13373 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13376 tree float128_type;
13378 /* The __float80 type. */
13379 if (TYPE_MODE (long_double_type_node) == XFmode)
13380 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13384 /* The __float80 type. */
13385 float80_type = make_node (REAL_TYPE);
13386 TYPE_PRECISION (float80_type) = 96;
13387 layout_type (float80_type);
13388 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13391 float128_type = make_node (REAL_TYPE);
13392 TYPE_PRECISION (float128_type) = 128;
13393 layout_type (float128_type);
13394 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13396 /* Add all builtins that are more or less simple operations on two
13398 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13400 /* Use one of the operands; the target can have a different mode for
13401 mask-generating compares. */
13402 enum machine_mode mode;
13407 mode = insn_data[d->icode].operand[1].mode;
13412 type = v16qi_ftype_v16qi_v16qi;
13415 type = v8hi_ftype_v8hi_v8hi;
13418 type = v4si_ftype_v4si_v4si;
13421 type = v2di_ftype_v2di_v2di;
13424 type = v2df_ftype_v2df_v2df;
13427 type = ti_ftype_ti_ti;
13430 type = v4sf_ftype_v4sf_v4sf;
13433 type = v8qi_ftype_v8qi_v8qi;
13436 type = v4hi_ftype_v4hi_v4hi;
13439 type = v2si_ftype_v2si_v2si;
13442 type = di_ftype_di_di;
13449 /* Override for comparisons. */
13450 if (d->icode == CODE_FOR_maskcmpv4sf3
13451 || d->icode == CODE_FOR_maskncmpv4sf3
13452 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13453 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13454 type = v4si_ftype_v4sf_v4sf;
13456 if (d->icode == CODE_FOR_maskcmpv2df3
13457 || d->icode == CODE_FOR_maskncmpv2df3
13458 || d->icode == CODE_FOR_vmmaskcmpv2df3
13459 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13460 type = v2di_ftype_v2df_v2df;
13462 def_builtin (d->mask, d->name, type, d->code);
13465 /* Add the remaining MMX insns with somewhat more complicated types. */
13466 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13467 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13468 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13469 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13470 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13472 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13473 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13474 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13476 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13477 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13479 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13480 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13482 /* comi/ucomi insns. */
13483 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13484 if (d->mask == MASK_SSE2)
13485 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13487 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13489 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13490 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13491 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13493 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13494 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13495 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13496 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13497 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13498 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13499 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13500 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13501 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13502 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13503 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13505 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13506 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13508 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13510 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13511 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13512 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13513 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13514 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13515 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13517 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13518 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13519 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13520 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13522 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13523 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13524 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13525 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13527 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13529 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13531 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13532 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13533 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13534 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13535 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13536 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13538 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13540 /* Original 3DNow! */
13541 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13542 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13543 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13544 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13545 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13546 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13547 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13548 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13549 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13550 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13551 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13552 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13553 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13554 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13555 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13556 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13557 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13558 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13559 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13560 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13562 /* 3DNow! extension as used in the Athlon CPU. */
13563 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13564 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13565 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13566 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13567 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13568 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13570 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13577 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13583 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13588 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13593 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13596 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13604 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13608 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13609 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13613 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13614 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13615 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13621 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13622 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13629 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13653 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13669 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13674 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13681 /* Prescott New Instructions. */
13682 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13683 void_ftype_pcvoid_unsigned_unsigned,
13684 IX86_BUILTIN_MONITOR);
13685 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13686 void_ftype_unsigned_unsigned,
13687 IX86_BUILTIN_MWAIT);
13688 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13690 IX86_BUILTIN_MOVSHDUP);
13691 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13693 IX86_BUILTIN_MOVSLDUP);
13694 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13695 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13696 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13697 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13698 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13699 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13702 /* Errors in the source file can cause expand_expr to return const0_rtx
13703 where we expect a vector. To avoid crashing, use one of the vector
13704 clear instructions. */
13706 safe_vector_operand (rtx x, enum machine_mode mode)
13708 if (x != const0_rtx)
13710 x = gen_reg_rtx (mode);
13712 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13713 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13714 : gen_rtx_SUBREG (DImode, x, 0)));
13716 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13717 : gen_rtx_SUBREG (V4SFmode, x, 0),
13718 CONST0_RTX (V4SFmode)));
13722 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13725 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13728 tree arg0 = TREE_VALUE (arglist);
13729 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13730 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13731 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13732 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13733 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13734 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13736 if (VECTOR_MODE_P (mode0))
13737 op0 = safe_vector_operand (op0, mode0);
13738 if (VECTOR_MODE_P (mode1))
13739 op1 = safe_vector_operand (op1, mode1);
13742 || GET_MODE (target) != tmode
13743 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13744 target = gen_reg_rtx (tmode);
13746 if (GET_MODE (op1) == SImode && mode1 == TImode)
13748 rtx x = gen_reg_rtx (V4SImode);
13749 emit_insn (gen_sse2_loadd (x, op1));
13750 op1 = gen_lowpart (TImode, x);
13753 /* In case the insn wants input operands in modes different from
13754 the result, abort. */
13755 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13756 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13759 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13760 op0 = copy_to_mode_reg (mode0, op0);
13761 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13762 op1 = copy_to_mode_reg (mode1, op1);
13764 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13765 yet one of the two must not be a memory. This is normally enforced
13766 by expanders, but we didn't bother to create one here. */
13767 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13768 op0 = copy_to_mode_reg (mode0, op0);
13770 pat = GEN_FCN (icode) (target, op0, op1);
13777 /* Subroutine of ix86_expand_builtin to take care of stores. */
13780 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13783 tree arg0 = TREE_VALUE (arglist);
13784 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13785 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13786 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13787 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13788 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13790 if (VECTOR_MODE_P (mode1))
13791 op1 = safe_vector_operand (op1, mode1);
13793 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13794 op1 = copy_to_mode_reg (mode1, op1);
13796 pat = GEN_FCN (icode) (op0, op1);
13802 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13805 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13806 rtx target, int do_load)
13809 tree arg0 = TREE_VALUE (arglist);
13810 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13811 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13812 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13815 || GET_MODE (target) != tmode
13816 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13817 target = gen_reg_rtx (tmode);
13819 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13822 if (VECTOR_MODE_P (mode0))
13823 op0 = safe_vector_operand (op0, mode0);
13825 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13826 op0 = copy_to_mode_reg (mode0, op0);
13829 pat = GEN_FCN (icode) (target, op0);
13836 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13837 sqrtss, rsqrtss, rcpss. */
13840 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13843 tree arg0 = TREE_VALUE (arglist);
13844 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13845 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13846 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13849 || GET_MODE (target) != tmode
13850 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13851 target = gen_reg_rtx (tmode);
13853 if (VECTOR_MODE_P (mode0))
13854 op0 = safe_vector_operand (op0, mode0);
13856 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13857 op0 = copy_to_mode_reg (mode0, op0);
13860 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13861 op1 = copy_to_mode_reg (mode0, op1);
13863 pat = GEN_FCN (icode) (target, op0, op1);
13870 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13873 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13877 tree arg0 = TREE_VALUE (arglist);
13878 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13879 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13880 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13882 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13883 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13884 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13885 enum rtx_code comparison = d->comparison;
13887 if (VECTOR_MODE_P (mode0))
13888 op0 = safe_vector_operand (op0, mode0);
13889 if (VECTOR_MODE_P (mode1))
13890 op1 = safe_vector_operand (op1, mode1);
13892 /* Swap operands if we have a comparison that isn't available in
13896 rtx tmp = gen_reg_rtx (mode1);
13897 emit_move_insn (tmp, op1);
13903 || GET_MODE (target) != tmode
13904 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13905 target = gen_reg_rtx (tmode);
13907 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13908 op0 = copy_to_mode_reg (mode0, op0);
13909 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13910 op1 = copy_to_mode_reg (mode1, op1);
13912 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13913 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13920 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13923 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13927 tree arg0 = TREE_VALUE (arglist);
13928 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13929 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13930 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13932 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13933 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13934 enum rtx_code comparison = d->comparison;
13936 if (VECTOR_MODE_P (mode0))
13937 op0 = safe_vector_operand (op0, mode0);
13938 if (VECTOR_MODE_P (mode1))
13939 op1 = safe_vector_operand (op1, mode1);
13941 /* Swap operands if we have a comparison that isn't available in
13950 target = gen_reg_rtx (SImode);
13951 emit_move_insn (target, const0_rtx);
13952 target = gen_rtx_SUBREG (QImode, target, 0);
13954 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13955 op0 = copy_to_mode_reg (mode0, op0);
13956 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13957 op1 = copy_to_mode_reg (mode1, op1);
13959 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13960 pat = GEN_FCN (d->icode) (op0, op1);
13964 emit_insn (gen_rtx_SET (VOIDmode,
13965 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13966 gen_rtx_fmt_ee (comparison, QImode,
13970 return SUBREG_REG (target);
13973 /* Expand an expression EXP that calls a built-in function,
13974 with result going to TARGET if that's convenient
13975 (and in mode MODE if that's convenient).
13976 SUBTARGET may be used as the target for computing one of EXP's operands.
13977 IGNORE is nonzero if the value is to be ignored. */
13980 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13981 enum machine_mode mode ATTRIBUTE_UNUSED,
13982 int ignore ATTRIBUTE_UNUSED)
13984 const struct builtin_description *d;
13986 enum insn_code icode;
13987 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13988 tree arglist = TREE_OPERAND (exp, 1);
13989 tree arg0, arg1, arg2;
13990 rtx op0, op1, op2, pat;
13991 enum machine_mode tmode, mode0, mode1, mode2;
13992 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13996 case IX86_BUILTIN_EMMS:
13997 emit_insn (gen_emms ());
14000 case IX86_BUILTIN_SFENCE:
14001 emit_insn (gen_sfence ());
14004 case IX86_BUILTIN_PEXTRW:
14005 case IX86_BUILTIN_PEXTRW128:
14006 icode = (fcode == IX86_BUILTIN_PEXTRW
14007 ? CODE_FOR_mmx_pextrw
14008 : CODE_FOR_sse2_pextrw);
14009 arg0 = TREE_VALUE (arglist);
14010 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14011 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14012 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14013 tmode = insn_data[icode].operand[0].mode;
14014 mode0 = insn_data[icode].operand[1].mode;
14015 mode1 = insn_data[icode].operand[2].mode;
14017 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14018 op0 = copy_to_mode_reg (mode0, op0);
14019 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14021 error ("selector must be an integer constant in the range 0..%i",
14022 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14023 return gen_reg_rtx (tmode);
14026 || GET_MODE (target) != tmode
14027 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14028 target = gen_reg_rtx (tmode);
14029 pat = GEN_FCN (icode) (target, op0, op1);
14035 case IX86_BUILTIN_PINSRW:
14036 case IX86_BUILTIN_PINSRW128:
14037 icode = (fcode == IX86_BUILTIN_PINSRW
14038 ? CODE_FOR_mmx_pinsrw
14039 : CODE_FOR_sse2_pinsrw);
14040 arg0 = TREE_VALUE (arglist);
14041 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14042 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14043 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14044 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14045 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14046 tmode = insn_data[icode].operand[0].mode;
14047 mode0 = insn_data[icode].operand[1].mode;
14048 mode1 = insn_data[icode].operand[2].mode;
14049 mode2 = insn_data[icode].operand[3].mode;
14051 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14052 op0 = copy_to_mode_reg (mode0, op0);
14053 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14054 op1 = copy_to_mode_reg (mode1, op1);
14055 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14057 error ("selector must be an integer constant in the range 0..%i",
14058 fcode == IX86_BUILTIN_PINSRW ? 15:255);
14062 || GET_MODE (target) != tmode
14063 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14064 target = gen_reg_rtx (tmode);
14065 pat = GEN_FCN (icode) (target, op0, op1, op2);
14071 case IX86_BUILTIN_MASKMOVQ:
14072 case IX86_BUILTIN_MASKMOVDQU:
14073 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14074 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14075 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14076 : CODE_FOR_sse2_maskmovdqu));
14077 /* Note the arg order is different from the operand order. */
14078 arg1 = TREE_VALUE (arglist);
14079 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14080 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14081 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14082 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14083 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14084 mode0 = insn_data[icode].operand[0].mode;
14085 mode1 = insn_data[icode].operand[1].mode;
14086 mode2 = insn_data[icode].operand[2].mode;
14088 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14089 op0 = copy_to_mode_reg (mode0, op0);
14090 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14091 op1 = copy_to_mode_reg (mode1, op1);
14092 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14093 op2 = copy_to_mode_reg (mode2, op2);
14094 pat = GEN_FCN (icode) (op0, op1, op2);
14100 case IX86_BUILTIN_SQRTSS:
14101 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14102 case IX86_BUILTIN_RSQRTSS:
14103 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14104 case IX86_BUILTIN_RCPSS:
14105 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14107 case IX86_BUILTIN_LOADAPS:
14108 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14110 case IX86_BUILTIN_LOADUPS:
14111 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14113 case IX86_BUILTIN_STOREAPS:
14114 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14116 case IX86_BUILTIN_STOREUPS:
14117 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14119 case IX86_BUILTIN_LOADSS:
14120 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14122 case IX86_BUILTIN_STORESS:
14123 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14125 case IX86_BUILTIN_LOADHPS:
14126 case IX86_BUILTIN_LOADLPS:
14127 case IX86_BUILTIN_LOADHPD:
14128 case IX86_BUILTIN_LOADLPD:
14129 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14130 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14131 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14132 : CODE_FOR_sse2_movsd);
14133 arg0 = TREE_VALUE (arglist);
14134 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14135 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14136 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14137 tmode = insn_data[icode].operand[0].mode;
14138 mode0 = insn_data[icode].operand[1].mode;
14139 mode1 = insn_data[icode].operand[2].mode;
14141 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14142 op0 = copy_to_mode_reg (mode0, op0);
14143 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14145 || GET_MODE (target) != tmode
14146 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14147 target = gen_reg_rtx (tmode);
14148 pat = GEN_FCN (icode) (target, op0, op1);
14154 case IX86_BUILTIN_STOREHPS:
14155 case IX86_BUILTIN_STORELPS:
14156 case IX86_BUILTIN_STOREHPD:
14157 case IX86_BUILTIN_STORELPD:
14158 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14159 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14160 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14161 : CODE_FOR_sse2_movsd);
14162 arg0 = TREE_VALUE (arglist);
14163 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14164 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14165 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14166 mode0 = insn_data[icode].operand[1].mode;
14167 mode1 = insn_data[icode].operand[2].mode;
14169 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14170 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14171 op1 = copy_to_mode_reg (mode1, op1);
14173 pat = GEN_FCN (icode) (op0, op0, op1);
14179 case IX86_BUILTIN_MOVNTPS:
14180 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14181 case IX86_BUILTIN_MOVNTQ:
14182 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14184 case IX86_BUILTIN_LDMXCSR:
14185 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14186 target = assign_386_stack_local (SImode, 0);
14187 emit_move_insn (target, op0);
14188 emit_insn (gen_ldmxcsr (target));
14191 case IX86_BUILTIN_STMXCSR:
14192 target = assign_386_stack_local (SImode, 0);
14193 emit_insn (gen_stmxcsr (target));
14194 return copy_to_mode_reg (SImode, target);
14196 case IX86_BUILTIN_SHUFPS:
14197 case IX86_BUILTIN_SHUFPD:
14198 icode = (fcode == IX86_BUILTIN_SHUFPS
14199 ? CODE_FOR_sse_shufps
14200 : CODE_FOR_sse2_shufpd);
14201 arg0 = TREE_VALUE (arglist);
14202 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14203 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14204 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14205 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14206 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14207 tmode = insn_data[icode].operand[0].mode;
14208 mode0 = insn_data[icode].operand[1].mode;
14209 mode1 = insn_data[icode].operand[2].mode;
14210 mode2 = insn_data[icode].operand[3].mode;
14212 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14213 op0 = copy_to_mode_reg (mode0, op0);
14214 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14215 op1 = copy_to_mode_reg (mode1, op1);
14216 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14218 /* @@@ better error message */
14219 error ("mask must be an immediate");
14220 return gen_reg_rtx (tmode);
14223 || GET_MODE (target) != tmode
14224 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14225 target = gen_reg_rtx (tmode);
14226 pat = GEN_FCN (icode) (target, op0, op1, op2);
14232 case IX86_BUILTIN_PSHUFW:
14233 case IX86_BUILTIN_PSHUFD:
14234 case IX86_BUILTIN_PSHUFHW:
14235 case IX86_BUILTIN_PSHUFLW:
14236 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14237 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14238 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14239 : CODE_FOR_mmx_pshufw);
14240 arg0 = TREE_VALUE (arglist);
14241 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14242 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14243 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14244 tmode = insn_data[icode].operand[0].mode;
14245 mode1 = insn_data[icode].operand[1].mode;
14246 mode2 = insn_data[icode].operand[2].mode;
14248 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14249 op0 = copy_to_mode_reg (mode1, op0);
14250 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14252 /* @@@ better error message */
14253 error ("mask must be an immediate");
14257 || GET_MODE (target) != tmode
14258 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14259 target = gen_reg_rtx (tmode);
14260 pat = GEN_FCN (icode) (target, op0, op1);
14266 case IX86_BUILTIN_PSLLDQI128:
14267 case IX86_BUILTIN_PSRLDQI128:
14268 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14269 : CODE_FOR_sse2_lshrti3);
14270 arg0 = TREE_VALUE (arglist);
14271 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14272 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14273 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14274 tmode = insn_data[icode].operand[0].mode;
14275 mode1 = insn_data[icode].operand[1].mode;
14276 mode2 = insn_data[icode].operand[2].mode;
14278 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14280 op0 = copy_to_reg (op0);
14281 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14283 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14285 error ("shift must be an immediate");
14288 target = gen_reg_rtx (V2DImode);
14289 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14295 case IX86_BUILTIN_FEMMS:
14296 emit_insn (gen_femms ());
14299 case IX86_BUILTIN_PAVGUSB:
14300 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14302 case IX86_BUILTIN_PF2ID:
14303 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14305 case IX86_BUILTIN_PFACC:
14306 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14308 case IX86_BUILTIN_PFADD:
14309 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14311 case IX86_BUILTIN_PFCMPEQ:
14312 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14314 case IX86_BUILTIN_PFCMPGE:
14315 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14317 case IX86_BUILTIN_PFCMPGT:
14318 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14320 case IX86_BUILTIN_PFMAX:
14321 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14323 case IX86_BUILTIN_PFMIN:
14324 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14326 case IX86_BUILTIN_PFMUL:
14327 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14329 case IX86_BUILTIN_PFRCP:
14330 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14332 case IX86_BUILTIN_PFRCPIT1:
14333 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14335 case IX86_BUILTIN_PFRCPIT2:
14336 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14338 case IX86_BUILTIN_PFRSQIT1:
14339 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14341 case IX86_BUILTIN_PFRSQRT:
14342 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14344 case IX86_BUILTIN_PFSUB:
14345 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14347 case IX86_BUILTIN_PFSUBR:
14348 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14350 case IX86_BUILTIN_PI2FD:
14351 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14353 case IX86_BUILTIN_PMULHRW:
14354 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14356 case IX86_BUILTIN_PF2IW:
14357 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14359 case IX86_BUILTIN_PFNACC:
14360 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14362 case IX86_BUILTIN_PFPNACC:
14363 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14365 case IX86_BUILTIN_PI2FW:
14366 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14368 case IX86_BUILTIN_PSWAPDSI:
14369 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14371 case IX86_BUILTIN_PSWAPDSF:
14372 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14374 case IX86_BUILTIN_SSE_ZERO:
14375 target = gen_reg_rtx (V4SFmode);
14376 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14379 case IX86_BUILTIN_MMX_ZERO:
14380 target = gen_reg_rtx (DImode);
14381 emit_insn (gen_mmx_clrdi (target));
14384 case IX86_BUILTIN_CLRTI:
14385 target = gen_reg_rtx (V2DImode);
14386 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14390 case IX86_BUILTIN_SQRTSD:
14391 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14392 case IX86_BUILTIN_LOADAPD:
14393 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14394 case IX86_BUILTIN_LOADUPD:
14395 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14397 case IX86_BUILTIN_STOREAPD:
14398 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14399 case IX86_BUILTIN_STOREUPD:
14400 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14402 case IX86_BUILTIN_LOADSD:
14403 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14405 case IX86_BUILTIN_STORESD:
14406 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14408 case IX86_BUILTIN_SETPD1:
14409 target = assign_386_stack_local (DFmode, 0);
14410 arg0 = TREE_VALUE (arglist);
14411 emit_move_insn (adjust_address (target, DFmode, 0),
14412 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14413 op0 = gen_reg_rtx (V2DFmode);
14414 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14415 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14418 case IX86_BUILTIN_SETPD:
14419 target = assign_386_stack_local (V2DFmode, 0);
14420 arg0 = TREE_VALUE (arglist);
14421 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14422 emit_move_insn (adjust_address (target, DFmode, 0),
14423 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14424 emit_move_insn (adjust_address (target, DFmode, 8),
14425 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14426 op0 = gen_reg_rtx (V2DFmode);
14427 emit_insn (gen_sse2_movapd (op0, target));
14430 case IX86_BUILTIN_LOADRPD:
14431 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14432 gen_reg_rtx (V2DFmode), 1);
14433 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14436 case IX86_BUILTIN_LOADPD1:
14437 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14438 gen_reg_rtx (V2DFmode), 1);
14439 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14442 case IX86_BUILTIN_STOREPD1:
14443 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14444 case IX86_BUILTIN_STORERPD:
14445 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14447 case IX86_BUILTIN_CLRPD:
14448 target = gen_reg_rtx (V2DFmode);
14449 emit_insn (gen_sse_clrv2df (target));
14452 case IX86_BUILTIN_MFENCE:
14453 emit_insn (gen_sse2_mfence ());
14455 case IX86_BUILTIN_LFENCE:
14456 emit_insn (gen_sse2_lfence ());
14459 case IX86_BUILTIN_CLFLUSH:
14460 arg0 = TREE_VALUE (arglist);
14461 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14462 icode = CODE_FOR_sse2_clflush;
14463 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14464 op0 = copy_to_mode_reg (Pmode, op0);
14466 emit_insn (gen_sse2_clflush (op0));
14469 case IX86_BUILTIN_MOVNTPD:
14470 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14471 case IX86_BUILTIN_MOVNTDQ:
14472 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14473 case IX86_BUILTIN_MOVNTI:
14474 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14476 case IX86_BUILTIN_LOADDQA:
14477 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14478 case IX86_BUILTIN_LOADDQU:
14479 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14480 case IX86_BUILTIN_LOADD:
14481 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14483 case IX86_BUILTIN_STOREDQA:
14484 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14485 case IX86_BUILTIN_STOREDQU:
14486 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14487 case IX86_BUILTIN_STORED:
14488 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14490 case IX86_BUILTIN_MONITOR:
14491 arg0 = TREE_VALUE (arglist);
14492 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14493 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14494 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14495 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14496 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14498 op0 = copy_to_mode_reg (SImode, op0);
14500 op1 = copy_to_mode_reg (SImode, op1);
14502 op2 = copy_to_mode_reg (SImode, op2);
14503 emit_insn (gen_monitor (op0, op1, op2));
14506 case IX86_BUILTIN_MWAIT:
14507 arg0 = TREE_VALUE (arglist);
14508 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14509 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14510 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14512 op0 = copy_to_mode_reg (SImode, op0);
14514 op1 = copy_to_mode_reg (SImode, op1);
14515 emit_insn (gen_mwait (op0, op1));
14518 case IX86_BUILTIN_LOADDDUP:
14519 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14521 case IX86_BUILTIN_LDDQU:
14522 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14529 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14530 if (d->code == fcode)
14532 /* Compares are treated specially. */
14533 if (d->icode == CODE_FOR_maskcmpv4sf3
14534 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14535 || d->icode == CODE_FOR_maskncmpv4sf3
14536 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14537 || d->icode == CODE_FOR_maskcmpv2df3
14538 || d->icode == CODE_FOR_vmmaskcmpv2df3
14539 || d->icode == CODE_FOR_maskncmpv2df3
14540 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14541 return ix86_expand_sse_compare (d, arglist, target);
14543 return ix86_expand_binop_builtin (d->icode, arglist, target);
14546 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14547 if (d->code == fcode)
14548 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14550 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14551 if (d->code == fcode)
14552 return ix86_expand_sse_comi (d, arglist, target);
14554 /* @@@ Should really do something sensible here. */
14558 /* Store OPERAND to the memory after reload is completed. This means
14559 that we can't easily use assign_stack_local. */
14561 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14564 if (!reload_completed)
14566 if (TARGET_RED_ZONE)
14568 result = gen_rtx_MEM (mode,
14569 gen_rtx_PLUS (Pmode,
14571 GEN_INT (-RED_ZONE_SIZE)));
14572 emit_move_insn (result, operand);
14574 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14580 operand = gen_lowpart (DImode, operand);
14584 gen_rtx_SET (VOIDmode,
14585 gen_rtx_MEM (DImode,
14586 gen_rtx_PRE_DEC (DImode,
14587 stack_pointer_rtx)),
14593 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14602 split_di (&operand, 1, operands, operands + 1);
14604 gen_rtx_SET (VOIDmode,
14605 gen_rtx_MEM (SImode,
14606 gen_rtx_PRE_DEC (Pmode,
14607 stack_pointer_rtx)),
14610 gen_rtx_SET (VOIDmode,
14611 gen_rtx_MEM (SImode,
14612 gen_rtx_PRE_DEC (Pmode,
14613 stack_pointer_rtx)),
14618 /* It is better to store HImodes as SImodes. */
14619 if (!TARGET_PARTIAL_REG_STALL)
14620 operand = gen_lowpart (SImode, operand);
14624 gen_rtx_SET (VOIDmode,
14625 gen_rtx_MEM (GET_MODE (operand),
14626 gen_rtx_PRE_DEC (SImode,
14627 stack_pointer_rtx)),
14633 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14638 /* Free operand from the memory. */
14640 ix86_free_from_memory (enum machine_mode mode)
14642 if (!TARGET_RED_ZONE)
14646 if (mode == DImode || TARGET_64BIT)
14648 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14652 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14653 to pop or add instruction if registers are available. */
14654 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14655 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14660 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14661 QImode must go into class Q_REGS.
14662 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14663 movdf to do mem-to-mem moves through integer regs. */
14665 ix86_preferred_reload_class (rtx x, enum reg_class class)
14667 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14669 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14671 /* SSE can't load any constant directly yet. */
14672 if (SSE_CLASS_P (class))
14674 /* Floats can load 0 and 1. */
14675 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14677 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14678 if (MAYBE_SSE_CLASS_P (class))
14679 return (reg_class_subset_p (class, GENERAL_REGS)
14680 ? GENERAL_REGS : FLOAT_REGS);
14684 /* General regs can load everything. */
14685 if (reg_class_subset_p (class, GENERAL_REGS))
14686 return GENERAL_REGS;
14687 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14688 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14691 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14693 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14698 /* If we are copying between general and FP registers, we need a memory
14699 location. The same is true for SSE and MMX registers.
14701 The macro can't work reliably when one of the CLASSES is class containing
14702 registers from multiple units (SSE, MMX, integer). We avoid this by never
14703 combining those units in single alternative in the machine description.
14704 Ensure that this constraint holds to avoid unexpected surprises.
14706 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14707 enforce these sanity checks. */
14709 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14710 enum machine_mode mode, int strict)
14712 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14713 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14714 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14715 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14716 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14717 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14724 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14725 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14726 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14727 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14728 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14730 /* Return the cost of moving data from a register in class CLASS1 to
14731 one in class CLASS2.
14733 It is not required that the cost always equal 2 when FROM is the same as TO;
14734 on some machines it is expensive to move between registers if they are not
14735 general registers. */
14737 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14738 enum reg_class class2)
14740 /* In case we require secondary memory, compute cost of the store followed
14741 by load. In order to avoid bad register allocation choices, we need
14742 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14744 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14748 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14749 MEMORY_MOVE_COST (mode, class1, 1));
14750 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14751 MEMORY_MOVE_COST (mode, class2, 1));
14753 /* In case of copying from general_purpose_register we may emit multiple
14754 stores followed by single load causing memory size mismatch stall.
14755 Count this as arbitrarily high cost of 20. */
14756 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14759 /* In the case of FP/MMX moves, the registers actually overlap, and we
14760 have to switch modes in order to treat them differently. */
14761 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14762 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14768 /* Moves between SSE/MMX and integer unit are expensive. */
14769 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14770 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14771 return ix86_cost->mmxsse_to_integer;
14772 if (MAYBE_FLOAT_CLASS_P (class1))
14773 return ix86_cost->fp_move;
14774 if (MAYBE_SSE_CLASS_P (class1))
14775 return ix86_cost->sse_move;
14776 if (MAYBE_MMX_CLASS_P (class1))
14777 return ix86_cost->mmx_move;
14781 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14783 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14785 /* Flags and only flags can only hold CCmode values. */
14786 if (CC_REGNO_P (regno))
14787 return GET_MODE_CLASS (mode) == MODE_CC;
14788 if (GET_MODE_CLASS (mode) == MODE_CC
14789 || GET_MODE_CLASS (mode) == MODE_RANDOM
14790 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14792 if (FP_REGNO_P (regno))
14793 return VALID_FP_MODE_P (mode);
14794 if (SSE_REGNO_P (regno))
14795 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14796 if (MMX_REGNO_P (regno))
14798 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14799 /* We handle both integer and floats in the general purpose registers.
14800 In future we should be able to handle vector modes as well. */
14801 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14803 /* Take care for QImode values - they can be in non-QI regs, but then
14804 they do cause partial register stalls. */
14805 if (regno < 4 || mode != QImode || TARGET_64BIT)
14807 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14810 /* Return the cost of moving data of mode M between a
14811 register and memory. A value of 2 is the default; this cost is
14812 relative to those in `REGISTER_MOVE_COST'.
14814 If moving between registers and memory is more expensive than
14815 between two registers, you should define this macro to express the
14818 Model also increased moving costs of QImode registers in non
14822 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14824 if (FLOAT_CLASS_P (class))
14841 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14843 if (SSE_CLASS_P (class))
14846 switch (GET_MODE_SIZE (mode))
14860 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14862 if (MMX_CLASS_P (class))
14865 switch (GET_MODE_SIZE (mode))
14876 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14878 switch (GET_MODE_SIZE (mode))
14882 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14883 : ix86_cost->movzbl_load);
14885 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14886 : ix86_cost->int_store[0] + 4);
14889 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14891 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14892 if (mode == TFmode)
14894 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14895 * (((int) GET_MODE_SIZE (mode)
14896 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14900 /* Compute a (partial) cost for rtx X. Return true if the complete
14901 cost has been computed, and false if subexpressions should be
14902 scanned. In either case, *TOTAL contains the cost result. */
14905 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14907 enum machine_mode mode = GET_MODE (x);
14915 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14917 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14919 else if (flag_pic && SYMBOLIC_CONST (x)
14921 || (!GET_CODE (x) != LABEL_REF
14922 && (GET_CODE (x) != SYMBOL_REF
14923 || !SYMBOL_REF_LOCAL_P (x)))))
14930 if (mode == VOIDmode)
14933 switch (standard_80387_constant_p (x))
14938 default: /* Other constants */
14943 /* Start with (MEM (SYMBOL_REF)), since that's where
14944 it'll probably end up. Add a penalty for size. */
14945 *total = (COSTS_N_INSNS (1)
14946 + (flag_pic != 0 && !TARGET_64BIT)
14947 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14953 /* The zero extensions is often completely free on x86_64, so make
14954 it as cheap as possible. */
14955 if (TARGET_64BIT && mode == DImode
14956 && GET_MODE (XEXP (x, 0)) == SImode)
14958 else if (TARGET_ZERO_EXTEND_WITH_AND)
14959 *total = COSTS_N_INSNS (ix86_cost->add);
14961 *total = COSTS_N_INSNS (ix86_cost->movzx);
14965 *total = COSTS_N_INSNS (ix86_cost->movsx);
14969 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14970 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14972 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14975 *total = COSTS_N_INSNS (ix86_cost->add);
14978 if ((value == 2 || value == 3)
14979 && !TARGET_DECOMPOSE_LEA
14980 && ix86_cost->lea <= ix86_cost->shift_const)
14982 *total = COSTS_N_INSNS (ix86_cost->lea);
14992 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14994 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14996 if (INTVAL (XEXP (x, 1)) > 32)
14997 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14999 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15003 if (GET_CODE (XEXP (x, 1)) == AND)
15004 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15006 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15011 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15012 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15014 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15019 if (FLOAT_MODE_P (mode))
15020 *total = COSTS_N_INSNS (ix86_cost->fmul);
15021 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15023 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15026 for (nbits = 0; value != 0; value >>= 1)
15029 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15030 + nbits * ix86_cost->mult_bit);
15034 /* This is arbitrary */
15035 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15036 + 7 * ix86_cost->mult_bit);
15044 if (FLOAT_MODE_P (mode))
15045 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15047 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15051 if (FLOAT_MODE_P (mode))
15052 *total = COSTS_N_INSNS (ix86_cost->fadd);
15053 else if (!TARGET_DECOMPOSE_LEA
15054 && GET_MODE_CLASS (mode) == MODE_INT
15055 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15057 if (GET_CODE (XEXP (x, 0)) == PLUS
15058 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15059 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15060 && CONSTANT_P (XEXP (x, 1)))
15062 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15063 if (val == 2 || val == 4 || val == 8)
15065 *total = COSTS_N_INSNS (ix86_cost->lea);
15066 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15067 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15069 *total += rtx_cost (XEXP (x, 1), outer_code);
15073 else if (GET_CODE (XEXP (x, 0)) == MULT
15074 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15076 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15077 if (val == 2 || val == 4 || val == 8)
15079 *total = COSTS_N_INSNS (ix86_cost->lea);
15080 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15081 *total += rtx_cost (XEXP (x, 1), outer_code);
15085 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15087 *total = COSTS_N_INSNS (ix86_cost->lea);
15088 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15089 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15090 *total += rtx_cost (XEXP (x, 1), outer_code);
15097 if (FLOAT_MODE_P (mode))
15099 *total = COSTS_N_INSNS (ix86_cost->fadd);
15107 if (!TARGET_64BIT && mode == DImode)
15109 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15110 + (rtx_cost (XEXP (x, 0), outer_code)
15111 << (GET_MODE (XEXP (x, 0)) != DImode))
15112 + (rtx_cost (XEXP (x, 1), outer_code)
15113 << (GET_MODE (XEXP (x, 1)) != DImode)));
15119 if (FLOAT_MODE_P (mode))
15121 *total = COSTS_N_INSNS (ix86_cost->fchs);
15127 if (!TARGET_64BIT && mode == DImode)
15128 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15130 *total = COSTS_N_INSNS (ix86_cost->add);
15134 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15139 if (FLOAT_MODE_P (mode))
15140 *total = COSTS_N_INSNS (ix86_cost->fabs);
15144 if (FLOAT_MODE_P (mode))
15145 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15149 if (XINT (x, 1) == UNSPEC_TP)
15158 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15160 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15163 fputs ("\tpushl $", asm_out_file);
15164 assemble_name (asm_out_file, XSTR (symbol, 0));
15165 fputc ('\n', asm_out_file);
15171 static int current_machopic_label_num;
15173 /* Given a symbol name and its associated stub, write out the
15174 definition of the stub. */
15177 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15179 unsigned int length;
15180 char *binder_name, *symbol_name, lazy_ptr_name[32];
15181 int label = ++current_machopic_label_num;
15183 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15184 symb = (*targetm.strip_name_encoding) (symb);
15186 length = strlen (stub);
15187 binder_name = alloca (length + 32);
15188 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15190 length = strlen (symb);
15191 symbol_name = alloca (length + 32);
15192 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15194 sprintf (lazy_ptr_name, "L%d$lz", label);
15197 machopic_picsymbol_stub_section ();
15199 machopic_symbol_stub_section ();
15201 fprintf (file, "%s:\n", stub);
15202 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15206 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15207 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15208 fprintf (file, "\tjmp %%edx\n");
15211 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15213 fprintf (file, "%s:\n", binder_name);
15217 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15218 fprintf (file, "\tpushl %%eax\n");
15221 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15223 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15225 machopic_lazy_symbol_ptr_section ();
15226 fprintf (file, "%s:\n", lazy_ptr_name);
15227 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15228 fprintf (file, "\t.long %s\n", binder_name);
15230 #endif /* TARGET_MACHO */
15232 /* Order the registers for register allocator. */
15235 x86_order_regs_for_local_alloc (void)
15240 /* First allocate the local general purpose registers. */
15241 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15242 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15243 reg_alloc_order [pos++] = i;
15245 /* Global general purpose registers. */
15246 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15247 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15248 reg_alloc_order [pos++] = i;
15250 /* x87 registers come first in case we are doing FP math
15252 if (!TARGET_SSE_MATH)
15253 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15254 reg_alloc_order [pos++] = i;
15256 /* SSE registers. */
15257 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15258 reg_alloc_order [pos++] = i;
15259 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15260 reg_alloc_order [pos++] = i;
15262 /* x87 registers. */
15263 if (TARGET_SSE_MATH)
15264 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15265 reg_alloc_order [pos++] = i;
15267 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15268 reg_alloc_order [pos++] = i;
15270 /* Initialize the rest of array as we do not allocate some registers
15272 while (pos < FIRST_PSEUDO_REGISTER)
15273 reg_alloc_order [pos++] = 0;
15276 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15277 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15280 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15281 struct attribute_spec.handler. */
15283 ix86_handle_struct_attribute (tree *node, tree name,
15284 tree args ATTRIBUTE_UNUSED,
15285 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15288 if (DECL_P (*node))
15290 if (TREE_CODE (*node) == TYPE_DECL)
15291 type = &TREE_TYPE (*node);
15296 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15297 || TREE_CODE (*type) == UNION_TYPE)))
15299 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15300 *no_add_attrs = true;
15303 else if ((is_attribute_p ("ms_struct", name)
15304 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15305 || ((is_attribute_p ("gcc_struct", name)
15306 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15308 warning ("`%s' incompatible attribute ignored",
15309 IDENTIFIER_POINTER (name));
15310 *no_add_attrs = true;
15317 ix86_ms_bitfield_layout_p (tree record_type)
15319 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15320 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15321 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15324 /* Returns an expression indicating where the this parameter is
15325 located on entry to the FUNCTION. */
15328 x86_this_parameter (tree function)
15330 tree type = TREE_TYPE (function);
15334 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15335 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15338 if (ix86_function_regparm (type, function) > 0)
15342 parm = TYPE_ARG_TYPES (type);
15343 /* Figure out whether or not the function has a variable number of
15345 for (; parm; parm = TREE_CHAIN (parm))
15346 if (TREE_VALUE (parm) == void_type_node)
15348 /* If not, the this parameter is in the first argument. */
15352 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15354 return gen_rtx_REG (SImode, regno);
15358 if (aggregate_value_p (TREE_TYPE (type), type))
15359 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15361 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15364 /* Determine whether x86_output_mi_thunk can succeed. */
15367 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15368 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15369 HOST_WIDE_INT vcall_offset, tree function)
15371 /* 64-bit can handle anything. */
15375 /* For 32-bit, everything's fine if we have one free register. */
15376 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15379 /* Need a free register for vcall_offset. */
15383 /* Need a free register for GOT references. */
15384 if (flag_pic && !(*targetm.binds_local_p) (function))
15387 /* Otherwise ok. */
15391 /* Output the assembler code for a thunk function. THUNK_DECL is the
15392 declaration for the thunk function itself, FUNCTION is the decl for
15393 the target function. DELTA is an immediate constant offset to be
15394 added to THIS. If VCALL_OFFSET is nonzero, the word at
15395 *(*this + vcall_offset) should be added to THIS. */
15398 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15399 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15400 HOST_WIDE_INT vcall_offset, tree function)
15403 rtx this = x86_this_parameter (function);
15406 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15407 pull it in now and let DELTA benefit. */
15410 else if (vcall_offset)
15412 /* Put the this parameter into %eax. */
15414 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15415 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15418 this_reg = NULL_RTX;
15420 /* Adjust the this parameter by a fixed constant. */
15423 xops[0] = GEN_INT (delta);
15424 xops[1] = this_reg ? this_reg : this;
15427 if (!x86_64_general_operand (xops[0], DImode))
15429 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15431 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15435 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15438 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15441 /* Adjust the this parameter by a value stored in the vtable. */
15445 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15448 int tmp_regno = 2 /* ECX */;
15449 if (lookup_attribute ("fastcall",
15450 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15451 tmp_regno = 0 /* EAX */;
15452 tmp = gen_rtx_REG (SImode, tmp_regno);
15455 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15458 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15460 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15462 /* Adjust the this parameter. */
15463 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15464 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15466 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15467 xops[0] = GEN_INT (vcall_offset);
15469 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15470 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15472 xops[1] = this_reg;
15474 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15476 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15479 /* If necessary, drop THIS back to its stack slot. */
15480 if (this_reg && this_reg != this)
15482 xops[0] = this_reg;
15484 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15487 xops[0] = XEXP (DECL_RTL (function), 0);
15490 if (!flag_pic || (*targetm.binds_local_p) (function))
15491 output_asm_insn ("jmp\t%P0", xops);
15494 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15495 tmp = gen_rtx_CONST (Pmode, tmp);
15496 tmp = gen_rtx_MEM (QImode, tmp);
15498 output_asm_insn ("jmp\t%A0", xops);
15503 if (!flag_pic || (*targetm.binds_local_p) (function))
15504 output_asm_insn ("jmp\t%P0", xops);
15509 const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15510 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15511 tmp = gen_rtx_MEM (QImode, tmp);
15513 output_asm_insn ("jmp\t%0", xops);
15516 #endif /* TARGET_MACHO */
15518 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15519 output_set_got (tmp);
15522 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15523 output_asm_insn ("jmp\t{*}%1", xops);
15529 x86_file_start (void)
15531 default_file_start ();
15532 if (X86_FILE_START_VERSION_DIRECTIVE)
15533 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15534 if (X86_FILE_START_FLTUSED)
15535 fputs ("\t.global\t__fltused\n", asm_out_file);
15536 if (ix86_asm_dialect == ASM_INTEL)
15537 fputs ("\t.intel_syntax\n", asm_out_file);
15541 x86_field_alignment (tree field, int computed)
15543 enum machine_mode mode;
15544 tree type = TREE_TYPE (field);
15546 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15548 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15549 ? get_inner_array_type (type) : type);
15550 if (mode == DFmode || mode == DCmode
15551 || GET_MODE_CLASS (mode) == MODE_INT
15552 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15553 return MIN (32, computed);
15557 /* Output assembler code to FILE to increment profiler label # LABELNO
15558 for profiling a function entry. */
15560 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15565 #ifndef NO_PROFILE_COUNTERS
15566 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15568 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15572 #ifndef NO_PROFILE_COUNTERS
15573 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15575 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15579 #ifndef NO_PROFILE_COUNTERS
15580 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15581 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15583 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15587 #ifndef NO_PROFILE_COUNTERS
15588 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15589 PROFILE_COUNT_REGISTER);
15591 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15595 /* We don't have exact information about the insn sizes, but we may assume
15596 quite safely that we are informed about all 1 byte insns and memory
15597 address sizes. This is enough to eliminate unnecessary padding in
15601 min_insn_size (rtx insn)
15605 if (!INSN_P (insn) || !active_insn_p (insn))
15608 /* Discard alignments we've emit and jump instructions. */
15609 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15610 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15612 if (GET_CODE (insn) == JUMP_INSN
15613 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15614 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15617 /* Important case - calls are always 5 bytes.
15618 It is common to have many calls in the row. */
15619 if (GET_CODE (insn) == CALL_INSN
15620 && symbolic_reference_mentioned_p (PATTERN (insn))
15621 && !SIBLING_CALL_P (insn))
15623 if (get_attr_length (insn) <= 1)
15626 /* For normal instructions we may rely on the sizes of addresses
15627 and the presence of symbol to require 4 bytes of encoding.
15628 This is not the case for jumps where references are PC relative. */
15629 if (GET_CODE (insn) != JUMP_INSN)
15631 l = get_attr_length_address (insn);
15632 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15641 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15645 k8_avoid_jump_misspredicts (void)
15647 rtx insn, start = get_insns ();
15648 int nbytes = 0, njumps = 0;
15651 /* Look for all minimal intervals of instructions containing 4 jumps.
15652 The intervals are bounded by START and INSN. NBYTES is the total
15653 size of instructions in the interval including INSN and not including
15654 START. When the NBYTES is smaller than 16 bytes, it is possible
15655 that the end of START and INSN ends up in the same 16byte page.
15657 The smallest offset in the page INSN can start is the case where START
15658 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15659 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15661 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15664 nbytes += min_insn_size (insn);
15666 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15667 INSN_UID (insn), min_insn_size (insn));
15668 if ((GET_CODE (insn) == JUMP_INSN
15669 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15670 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15671 || GET_CODE (insn) == CALL_INSN)
15678 start = NEXT_INSN (start);
15679 if ((GET_CODE (start) == JUMP_INSN
15680 && GET_CODE (PATTERN (start)) != ADDR_VEC
15681 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15682 || GET_CODE (start) == CALL_INSN)
15683 njumps--, isjump = 1;
15686 nbytes -= min_insn_size (start);
15691 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15692 INSN_UID (start), INSN_UID (insn), nbytes);
15694 if (njumps == 3 && isjump && nbytes < 16)
15696 int padsize = 15 - nbytes + min_insn_size (insn);
15699 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15700 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15705 /* Implement machine specific optimizations.
15706 At the moment we implement single transformation: AMD Athlon works faster
15707 when RET is not destination of conditional jump or directly preceded
15708 by other jump instruction. We avoid the penalty by inserting NOP just
15709 before the RET instructions in such cases. */
15715 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15717 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15719 basic_block bb = e->src;
15720 rtx ret = BB_END (bb);
15722 bool replace = false;
15724 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15725 || !maybe_hot_bb_p (bb))
15727 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15728 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15730 if (prev && GET_CODE (prev) == CODE_LABEL)
15733 for (e = bb->pred; e; e = e->pred_next)
15734 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15735 && !(e->flags & EDGE_FALLTHRU))
15740 prev = prev_active_insn (ret);
15742 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15743 || GET_CODE (prev) == CALL_INSN))
15745 /* Empty functions get branch mispredict even when the jump destination
15746 is not visible to us. */
15747 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15752 emit_insn_before (gen_return_internal_long (), ret);
15756 k8_avoid_jump_misspredicts ();
15759 /* Return nonzero when QImode register that must be represented via REX prefix
15762 x86_extended_QIreg_mentioned_p (rtx insn)
15765 extract_insn_cached (insn);
15766 for (i = 0; i < recog_data.n_operands; i++)
15767 if (REG_P (recog_data.operand[i])
15768 && REGNO (recog_data.operand[i]) >= 4)
15773 /* Return nonzero when P points to register encoded via REX prefix.
15774 Called via for_each_rtx. */
15776 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15778 unsigned int regno;
15781 regno = REGNO (*p);
15782 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15785 /* Return true when INSN mentions register that must be encoded using REX
15788 x86_extended_reg_mentioned_p (rtx insn)
15790 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15793 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15794 optabs would emit if we didn't have TFmode patterns. */
15797 x86_emit_floatuns (rtx operands[2])
15799 rtx neglab, donelab, i0, i1, f0, in, out;
15800 enum machine_mode mode, inmode;
15802 inmode = GET_MODE (operands[1]);
15803 if (inmode != SImode
15804 && inmode != DImode)
15808 in = force_reg (inmode, operands[1]);
15809 mode = GET_MODE (out);
15810 neglab = gen_label_rtx ();
15811 donelab = gen_label_rtx ();
15812 i1 = gen_reg_rtx (Pmode);
15813 f0 = gen_reg_rtx (mode);
15815 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15817 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15818 emit_jump_insn (gen_jump (donelab));
15821 emit_label (neglab);
15823 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15824 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15825 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15826 expand_float (f0, i0, 0);
15827 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15829 emit_label (donelab);
15832 /* Return if we do not know how to pass TYPE solely in registers. */
15834 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15836 if (default_must_pass_in_stack (mode, type))
15838 return (!TARGET_64BIT && type && mode == TImode);
15841 /* Initialize vector TARGET via VALS. */
15843 ix86_expand_vector_init (rtx target, rtx vals)
15845 enum machine_mode mode = GET_MODE (target);
15846 int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15847 int n_elts = (GET_MODE_SIZE (mode) / elt_size);
15850 for (i = n_elts - 1; i >= 0; i--)
15851 if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
15852 && GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
15855 /* Few special cases first...
15856 ... constants are best loaded from constant pool. */
15859 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15863 /* ... values where only first field is non-constant are best loaded
15864 from the pool and overwriten via move later. */
15867 rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
15868 GET_MODE_INNER (mode), 0);
15870 op = force_reg (mode, op);
15871 XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
15872 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
15873 switch (GET_MODE (target))
15876 emit_insn (gen_sse2_movsd (target, target, op));
15879 emit_insn (gen_sse_movss (target, target, op));
15887 /* And the busy sequence doing rotations. */
15888 switch (GET_MODE (target))
15893 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
15895 simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
15897 vecop0 = force_reg (V2DFmode, vecop0);
15898 vecop1 = force_reg (V2DFmode, vecop1);
15899 emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
15905 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
15907 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
15909 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
15911 simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
15912 rtx tmp1 = gen_reg_rtx (V4SFmode);
15913 rtx tmp2 = gen_reg_rtx (V4SFmode);
15915 vecop0 = force_reg (V4SFmode, vecop0);
15916 vecop1 = force_reg (V4SFmode, vecop1);
15917 vecop2 = force_reg (V4SFmode, vecop2);
15918 vecop3 = force_reg (V4SFmode, vecop3);
15919 emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
15920 emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
15921 emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
15929 #include "gt-i386.h"