1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand (rtx, enum machine_mode);
786 static int tls_symbolic_operand_1 (rtx, enum tls_model);
787 static void output_pic_addr_const (FILE *, rtx, int);
788 static void put_condition_code (enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name (void);
791 static int get_some_local_dynamic_name_1 (rtx *, void *);
792 static rtx maybe_get_pool_constant (rtx);
793 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
794 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
796 static rtx get_thread_pointer (int);
797 static rtx legitimize_tls_address (rtx, enum tls_model, int);
798 static void get_pc_thunk_name (char [32], unsigned int);
799 static rtx gen_push (rtx);
800 static int memory_address_length (rtx addr);
801 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
802 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
803 static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
804 static void ix86_dump_ppro_packet (FILE *);
805 static void ix86_reorder_insn (rtx *, rtx *);
806 static struct machine_function * ix86_init_machine_status (void);
807 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
808 static int ix86_nsaved_regs (void);
809 static void ix86_emit_save_regs (void);
810 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
811 static void ix86_emit_restore_regs_using_mov (rtx, int, int);
812 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
813 static void ix86_set_move_mem_attrs_1 (rtx, rtx, rtx, rtx, rtx);
814 static void ix86_sched_reorder_ppro (rtx *, rtx *);
815 static HOST_WIDE_INT ix86_GOT_alias_set (void);
816 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
817 static rtx ix86_expand_aligntest (rtx, int);
818 static void ix86_expand_strlensi_unroll_1 (rtx, rtx);
819 static int ix86_issue_rate (void);
820 static int ix86_adjust_cost (rtx, rtx, rtx, int);
821 static void ix86_sched_init (FILE *, int, int);
822 static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
823 static int ix86_variable_issue (FILE *, int, rtx, int);
824 static int ia32_use_dfa_pipeline_interface (void);
825 static int ia32_multipass_dfa_lookahead (void);
826 static void ix86_init_mmx_sse_builtins (void);
827 static rtx x86_this_parameter (tree);
828 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree);
830 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
831 static void x86_file_start (void);
832 static void ix86_reorg (void);
833 bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
837 rtx base, index, disp;
839 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
842 static int ix86_decompose_address (rtx, struct ix86_address *);
843 static int ix86_address_cost (rtx);
844 static bool ix86_cannot_force_const_mem (rtx);
845 static rtx ix86_delegitimize_address (rtx);
847 struct builtin_description;
848 static rtx ix86_expand_sse_comi (const struct builtin_description *,
850 static rtx ix86_expand_sse_compare (const struct builtin_description *,
852 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
853 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
854 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
855 static rtx ix86_expand_store_builtin (enum insn_code, tree);
856 static rtx safe_vector_operand (rtx, enum machine_mode);
857 static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
858 static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
859 enum rtx_code *, enum rtx_code *);
860 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
861 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
862 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
863 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
864 static int ix86_fp_comparison_cost (enum rtx_code code);
865 static unsigned int ix86_select_alt_pic_regnum (void);
866 static int ix86_save_reg (unsigned int, int);
867 static void ix86_compute_frame_layout (struct ix86_frame *);
868 static int ix86_comp_type_attributes (tree, tree);
869 static int ix86_function_regparm (tree, tree);
870 const struct attribute_spec ix86_attribute_table[];
871 static bool ix86_function_ok_for_sibcall (tree, tree);
872 static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
873 static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
874 static int ix86_value_regno (enum machine_mode);
875 static bool contains_128bit_aligned_vector_p (tree);
876 static bool ix86_ms_bitfield_layout_p (tree);
877 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
878 static int extended_reg_mentioned_1 (rtx *, void *);
879 static bool ix86_rtx_costs (rtx, int, int, int *);
880 static int min_insn_size (rtx);
881 static void k8_avoid_jump_misspredicts (void);
883 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
884 static void ix86_svr3_asm_out_constructor (rtx, int);
887 /* Register class used for passing given 64bit part of the argument.
888 These represent classes as documented by the PS ABI, with the exception
889 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
890 use SF or DFmode move instead of DImode to avoid reformatting penalties.
892 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
893 whenever possible (upper half does contain padding).
895 enum x86_64_reg_class
898 X86_64_INTEGER_CLASS,
899 X86_64_INTEGERSI_CLASS,
908 static const char * const x86_64_reg_class_name[] =
909 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
911 #define MAX_CLASSES 4
912 static int classify_argument (enum machine_mode, tree,
913 enum x86_64_reg_class [MAX_CLASSES], int);
914 static int examine_argument (enum machine_mode, tree, int, int *, int *);
915 static rtx construct_container (enum machine_mode, tree, int, int, int,
917 static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
918 enum x86_64_reg_class);
920 /* Table of constants used by fldpi, fldln2, etc... */
921 static REAL_VALUE_TYPE ext_80387_constants_table [5];
922 static bool ext_80387_constants_init = 0;
923 static void init_ext_80387_constants (void);
925 /* Initialize the GCC target structure. */
926 #undef TARGET_ATTRIBUTE_TABLE
927 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
928 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
929 # undef TARGET_MERGE_DECL_ATTRIBUTES
930 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
933 #undef TARGET_COMP_TYPE_ATTRIBUTES
934 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
936 #undef TARGET_INIT_BUILTINS
937 #define TARGET_INIT_BUILTINS ix86_init_builtins
939 #undef TARGET_EXPAND_BUILTIN
940 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
942 #undef TARGET_ASM_FUNCTION_EPILOGUE
943 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
945 #undef TARGET_ASM_OPEN_PAREN
946 #define TARGET_ASM_OPEN_PAREN ""
947 #undef TARGET_ASM_CLOSE_PAREN
948 #define TARGET_ASM_CLOSE_PAREN ""
950 #undef TARGET_ASM_ALIGNED_HI_OP
951 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
952 #undef TARGET_ASM_ALIGNED_SI_OP
953 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
955 #undef TARGET_ASM_ALIGNED_DI_OP
956 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
959 #undef TARGET_ASM_UNALIGNED_HI_OP
960 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
961 #undef TARGET_ASM_UNALIGNED_SI_OP
962 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
963 #undef TARGET_ASM_UNALIGNED_DI_OP
964 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
966 #undef TARGET_SCHED_ADJUST_COST
967 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
968 #undef TARGET_SCHED_ISSUE_RATE
969 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
970 #undef TARGET_SCHED_VARIABLE_ISSUE
971 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
972 #undef TARGET_SCHED_INIT
973 #define TARGET_SCHED_INIT ix86_sched_init
974 #undef TARGET_SCHED_REORDER
975 #define TARGET_SCHED_REORDER ix86_sched_reorder
976 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
977 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
978 ia32_use_dfa_pipeline_interface
979 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
980 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
981 ia32_multipass_dfa_lookahead
983 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
984 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
987 #undef TARGET_HAVE_TLS
988 #define TARGET_HAVE_TLS true
990 #undef TARGET_CANNOT_FORCE_CONST_MEM
991 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
993 #undef TARGET_DELEGITIMIZE_ADDRESS
994 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
996 #undef TARGET_MS_BITFIELD_LAYOUT_P
997 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
999 #undef TARGET_ASM_OUTPUT_MI_THUNK
1000 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1001 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1004 #undef TARGET_ASM_FILE_START
1005 #define TARGET_ASM_FILE_START x86_file_start
1007 #undef TARGET_RTX_COSTS
1008 #define TARGET_RTX_COSTS ix86_rtx_costs
1009 #undef TARGET_ADDRESS_COST
1010 #define TARGET_ADDRESS_COST ix86_address_cost
1012 #undef TARGET_MACHINE_DEPENDENT_REORG
1013 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1015 struct gcc_target targetm = TARGET_INITIALIZER;
1017 /* The svr4 ABI for the i386 says that records and unions are returned
1019 #ifndef DEFAULT_PCC_STRUCT_RETURN
1020 #define DEFAULT_PCC_STRUCT_RETURN 1
1023 /* Sometimes certain combinations of command options do not make
1024 sense on a particular target machine. You can define a macro
1025 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1026 defined, is executed once just after all the command options have
1029 Don't use this macro to turn on various extra optimizations for
1030 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1033 override_options (void)
1036 /* Comes from final.c -- no real reason to change it. */
1037 #define MAX_CODE_ALIGN 16
1041 const struct processor_costs *cost; /* Processor costs */
1042 const int target_enable; /* Target flags to enable. */
1043 const int target_disable; /* Target flags to disable. */
1044 const int align_loop; /* Default alignments. */
1045 const int align_loop_max_skip;
1046 const int align_jump;
1047 const int align_jump_max_skip;
1048 const int align_func;
1050 const processor_target_table[PROCESSOR_max] =
1052 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1053 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1054 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1055 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1056 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1057 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1058 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1059 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1062 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1065 const char *const name; /* processor name or nickname. */
1066 const enum processor_type processor;
1067 const enum pta_flags
1072 PTA_PREFETCH_SSE = 8,
1078 const processor_alias_table[] =
1080 {"i386", PROCESSOR_I386, 0},
1081 {"i486", PROCESSOR_I486, 0},
1082 {"i586", PROCESSOR_PENTIUM, 0},
1083 {"pentium", PROCESSOR_PENTIUM, 0},
1084 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1085 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1086 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1087 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1088 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1089 {"i686", PROCESSOR_PENTIUMPRO, 0},
1090 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1091 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1092 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1093 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1094 PTA_MMX | PTA_PREFETCH_SSE},
1095 {"k6", PROCESSOR_K6, PTA_MMX},
1096 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1097 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1098 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1100 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1101 | PTA_3DNOW | PTA_3DNOW_A},
1102 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1103 | PTA_3DNOW_A | PTA_SSE},
1104 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1109 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1112 int const pta_size = ARRAY_SIZE (processor_alias_table);
1114 /* By default our XFmode is the 80-bit extended format. If we have
1115 use TFmode instead, it's also the 80-bit format, but with padding. */
1116 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1117 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1119 /* Set the default values for switches whose default depends on TARGET_64BIT
1120 in case they weren't overwritten by command line options. */
1123 if (flag_omit_frame_pointer == 2)
1124 flag_omit_frame_pointer = 1;
1125 if (flag_asynchronous_unwind_tables == 2)
1126 flag_asynchronous_unwind_tables = 1;
1127 if (flag_pcc_struct_return == 2)
1128 flag_pcc_struct_return = 0;
1132 if (flag_omit_frame_pointer == 2)
1133 flag_omit_frame_pointer = 0;
1134 if (flag_asynchronous_unwind_tables == 2)
1135 flag_asynchronous_unwind_tables = 0;
1136 if (flag_pcc_struct_return == 2)
1137 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1140 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1141 SUBTARGET_OVERRIDE_OPTIONS;
1144 if (!ix86_tune_string && ix86_arch_string)
1145 ix86_tune_string = ix86_arch_string;
1146 if (!ix86_tune_string)
1147 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1148 if (!ix86_arch_string)
1149 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1151 if (ix86_cmodel_string != 0)
1153 if (!strcmp (ix86_cmodel_string, "small"))
1154 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1156 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1157 else if (!strcmp (ix86_cmodel_string, "32"))
1158 ix86_cmodel = CM_32;
1159 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1160 ix86_cmodel = CM_KERNEL;
1161 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1162 ix86_cmodel = CM_MEDIUM;
1163 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1164 ix86_cmodel = CM_LARGE;
1166 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1170 ix86_cmodel = CM_32;
1172 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1174 if (ix86_asm_string != 0)
1176 if (!strcmp (ix86_asm_string, "intel"))
1177 ix86_asm_dialect = ASM_INTEL;
1178 else if (!strcmp (ix86_asm_string, "att"))
1179 ix86_asm_dialect = ASM_ATT;
1181 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1183 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1184 error ("code model `%s' not supported in the %s bit mode",
1185 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1186 if (ix86_cmodel == CM_LARGE)
1187 sorry ("code model `large' not supported yet");
1188 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1189 sorry ("%i-bit mode not compiled in",
1190 (target_flags & MASK_64BIT) ? 64 : 32);
1192 for (i = 0; i < pta_size; i++)
1193 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1195 ix86_arch = processor_alias_table[i].processor;
1196 /* Default cpu tuning to the architecture. */
1197 ix86_tune = ix86_arch;
1198 if (processor_alias_table[i].flags & PTA_MMX
1199 && !(target_flags_explicit & MASK_MMX))
1200 target_flags |= MASK_MMX;
1201 if (processor_alias_table[i].flags & PTA_3DNOW
1202 && !(target_flags_explicit & MASK_3DNOW))
1203 target_flags |= MASK_3DNOW;
1204 if (processor_alias_table[i].flags & PTA_3DNOW_A
1205 && !(target_flags_explicit & MASK_3DNOW_A))
1206 target_flags |= MASK_3DNOW_A;
1207 if (processor_alias_table[i].flags & PTA_SSE
1208 && !(target_flags_explicit & MASK_SSE))
1209 target_flags |= MASK_SSE;
1210 if (processor_alias_table[i].flags & PTA_SSE2
1211 && !(target_flags_explicit & MASK_SSE2))
1212 target_flags |= MASK_SSE2;
1213 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1214 x86_prefetch_sse = true;
1215 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1216 error ("CPU you selected does not support x86-64 instruction set");
1221 error ("bad value (%s) for -march= switch", ix86_arch_string);
1223 for (i = 0; i < pta_size; i++)
1224 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1226 ix86_tune = processor_alias_table[i].processor;
1227 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1228 error ("CPU you selected does not support x86-64 instruction set");
1231 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1232 x86_prefetch_sse = true;
1234 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1237 ix86_cost = &size_cost;
1239 ix86_cost = processor_target_table[ix86_tune].cost;
1240 target_flags |= processor_target_table[ix86_tune].target_enable;
1241 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1243 /* Arrange to set up i386_stack_locals for all functions. */
1244 init_machine_status = ix86_init_machine_status;
1246 /* Validate -mregparm= value. */
1247 if (ix86_regparm_string)
1249 i = atoi (ix86_regparm_string);
1250 if (i < 0 || i > REGPARM_MAX)
1251 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1257 ix86_regparm = REGPARM_MAX;
1259 /* If the user has provided any of the -malign-* options,
1260 warn and use that value only if -falign-* is not set.
1261 Remove this code in GCC 3.2 or later. */
1262 if (ix86_align_loops_string)
1264 warning ("-malign-loops is obsolete, use -falign-loops");
1265 if (align_loops == 0)
1267 i = atoi (ix86_align_loops_string);
1268 if (i < 0 || i > MAX_CODE_ALIGN)
1269 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1271 align_loops = 1 << i;
1275 if (ix86_align_jumps_string)
1277 warning ("-malign-jumps is obsolete, use -falign-jumps");
1278 if (align_jumps == 0)
1280 i = atoi (ix86_align_jumps_string);
1281 if (i < 0 || i > MAX_CODE_ALIGN)
1282 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1284 align_jumps = 1 << i;
1288 if (ix86_align_funcs_string)
1290 warning ("-malign-functions is obsolete, use -falign-functions");
1291 if (align_functions == 0)
1293 i = atoi (ix86_align_funcs_string);
1294 if (i < 0 || i > MAX_CODE_ALIGN)
1295 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1297 align_functions = 1 << i;
1301 /* Default align_* from the processor table. */
1302 if (align_loops == 0)
1304 align_loops = processor_target_table[ix86_tune].align_loop;
1305 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1307 if (align_jumps == 0)
1309 align_jumps = processor_target_table[ix86_tune].align_jump;
1310 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1312 if (align_functions == 0)
1314 align_functions = processor_target_table[ix86_tune].align_func;
1317 /* Validate -mpreferred-stack-boundary= value, or provide default.
1318 The default of 128 bits is for Pentium III's SSE __m128, but we
1319 don't want additional code to keep the stack aligned when
1320 optimizing for code size. */
1321 ix86_preferred_stack_boundary = (optimize_size
1322 ? TARGET_64BIT ? 128 : 32
1324 if (ix86_preferred_stack_boundary_string)
1326 i = atoi (ix86_preferred_stack_boundary_string);
1327 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1328 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1329 TARGET_64BIT ? 4 : 2);
1331 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1334 /* Validate -mbranch-cost= value, or provide default. */
1335 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1336 if (ix86_branch_cost_string)
1338 i = atoi (ix86_branch_cost_string);
1340 error ("-mbranch-cost=%d is not between 0 and 5", i);
1342 ix86_branch_cost = i;
1345 if (ix86_tls_dialect_string)
1347 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1348 ix86_tls_dialect = TLS_DIALECT_GNU;
1349 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_SUN;
1352 error ("bad value (%s) for -mtls-dialect= switch",
1353 ix86_tls_dialect_string);
1356 /* Keep nonleaf frame pointers. */
1357 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1358 flag_omit_frame_pointer = 1;
1360 /* If we're doing fast math, we don't care about comparison order
1361 wrt NaNs. This lets us use a shorter comparison sequence. */
1362 if (flag_unsafe_math_optimizations)
1363 target_flags &= ~MASK_IEEE_FP;
1365 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1366 since the insns won't need emulation. */
1367 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1368 target_flags &= ~MASK_NO_FANCY_MATH_387;
1370 /* Turn on SSE2 builtins for -mpni. */
1372 target_flags |= MASK_SSE2;
1374 /* Turn on SSE builtins for -msse2. */
1376 target_flags |= MASK_SSE;
1380 if (TARGET_ALIGN_DOUBLE)
1381 error ("-malign-double makes no sense in the 64bit mode");
1383 error ("-mrtd calling convention not supported in the 64bit mode");
1384 /* Enable by default the SSE and MMX builtins. */
1385 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1386 ix86_fpmath = FPMATH_SSE;
1390 ix86_fpmath = FPMATH_387;
1391 /* i386 ABI does not specify red zone. It still makes sense to use it
1392 when programmer takes care to stack from being destroyed. */
1393 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1394 target_flags |= MASK_NO_RED_ZONE;
1397 if (ix86_fpmath_string != 0)
1399 if (! strcmp (ix86_fpmath_string, "387"))
1400 ix86_fpmath = FPMATH_387;
1401 else if (! strcmp (ix86_fpmath_string, "sse"))
1405 warning ("SSE instruction set disabled, using 387 arithmetics");
1406 ix86_fpmath = FPMATH_387;
1409 ix86_fpmath = FPMATH_SSE;
1411 else if (! strcmp (ix86_fpmath_string, "387,sse")
1412 || ! strcmp (ix86_fpmath_string, "sse,387"))
1416 warning ("SSE instruction set disabled, using 387 arithmetics");
1417 ix86_fpmath = FPMATH_387;
1419 else if (!TARGET_80387)
1421 warning ("387 instruction set disabled, using SSE arithmetics");
1422 ix86_fpmath = FPMATH_SSE;
1425 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1428 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1431 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1435 target_flags |= MASK_MMX;
1436 x86_prefetch_sse = true;
1439 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1442 target_flags |= MASK_MMX;
1443 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1444 extensions it adds. */
1445 if (x86_3dnow_a & (1 << ix86_arch))
1446 target_flags |= MASK_3DNOW_A;
1448 if ((x86_accumulate_outgoing_args & TUNEMASK)
1449 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1451 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1457 p = strchr (internal_label_prefix, 'X');
1458 internal_label_prefix_len = p - internal_label_prefix;
1464 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1466 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1467 make the problem with not enough registers even worse. */
1468 #ifdef INSN_SCHEDULING
1470 flag_schedule_insns = 0;
1473 /* The default values of these switches depend on the TARGET_64BIT
1474 that is not known at this moment. Mark these values with 2 and
1475 let user the to override these. In case there is no command line option
1476 specifying them, we will set the defaults in override_options. */
1478 flag_omit_frame_pointer = 2;
1479 flag_pcc_struct_return = 2;
1480 flag_asynchronous_unwind_tables = 2;
1483 /* Table of valid machine attributes. */
1484 const struct attribute_spec ix86_attribute_table[] =
1486 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1487 /* Stdcall attribute says callee is responsible for popping arguments
1488 if they are not variable. */
1489 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1490 /* Fastcall attribute says callee is responsible for popping arguments
1491 if they are not variable. */
1492 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1493 /* Cdecl attribute says the callee is a normal C declaration */
1494 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1495 /* Regparm attribute specifies how many integer arguments are to be
1496 passed in registers. */
1497 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1498 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1499 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1500 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1501 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1503 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1504 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1505 { NULL, 0, 0, false, false, false, NULL }
1508 /* Decide whether we can make a sibling call to a function. DECL is the
1509 declaration of the function being targeted by the call and EXP is the
1510 CALL_EXPR representing the call. */
1513 ix86_function_ok_for_sibcall (tree decl, tree exp)
1515 /* If we are generating position-independent code, we cannot sibcall
1516 optimize any indirect call, or a direct call to a global function,
1517 as the PLT requires %ebx be live. */
1518 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1521 /* If we are returning floats on the 80387 register stack, we cannot
1522 make a sibcall from a function that doesn't return a float to a
1523 function that does or, conversely, from a function that does return
1524 a float to a function that doesn't; the necessary stack adjustment
1525 would not be executed. */
1526 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1527 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1530 /* If this call is indirect, we'll need to be able to use a call-clobbered
1531 register for the address of the target function. Make sure that all
1532 such registers are not used for passing parameters. */
1533 if (!decl && !TARGET_64BIT)
1537 /* We're looking at the CALL_EXPR, we need the type of the function. */
1538 type = TREE_OPERAND (exp, 0); /* pointer expression */
1539 type = TREE_TYPE (type); /* pointer type */
1540 type = TREE_TYPE (type); /* function type */
1542 if (ix86_function_regparm (type, NULL) >= 3)
1544 /* ??? Need to count the actual number of registers to be used,
1545 not the possible number of registers. Fix later. */
1550 /* Otherwise okay. That also includes certain types of indirect calls. */
1554 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1555 arguments as in struct attribute_spec.handler. */
1557 ix86_handle_cdecl_attribute (tree *node, tree name,
1558 tree args ATTRIBUTE_UNUSED,
1559 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1561 if (TREE_CODE (*node) != FUNCTION_TYPE
1562 && TREE_CODE (*node) != METHOD_TYPE
1563 && TREE_CODE (*node) != FIELD_DECL
1564 && TREE_CODE (*node) != TYPE_DECL)
1566 warning ("`%s' attribute only applies to functions",
1567 IDENTIFIER_POINTER (name));
1568 *no_add_attrs = true;
1572 if (is_attribute_p ("fastcall", name))
1574 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1576 error ("fastcall and stdcall attributes are not compatible");
1578 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1580 error ("fastcall and regparm attributes are not compatible");
1583 else if (is_attribute_p ("stdcall", name))
1585 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1587 error ("fastcall and stdcall attributes are not compatible");
1594 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1595 *no_add_attrs = true;
1601 /* Handle a "regparm" attribute;
1602 arguments as in struct attribute_spec.handler. */
1604 ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1605 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1607 if (TREE_CODE (*node) != FUNCTION_TYPE
1608 && TREE_CODE (*node) != METHOD_TYPE
1609 && TREE_CODE (*node) != FIELD_DECL
1610 && TREE_CODE (*node) != TYPE_DECL)
1612 warning ("`%s' attribute only applies to functions",
1613 IDENTIFIER_POINTER (name));
1614 *no_add_attrs = true;
1620 cst = TREE_VALUE (args);
1621 if (TREE_CODE (cst) != INTEGER_CST)
1623 warning ("`%s' attribute requires an integer constant argument",
1624 IDENTIFIER_POINTER (name));
1625 *no_add_attrs = true;
1627 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1629 warning ("argument to `%s' attribute larger than %d",
1630 IDENTIFIER_POINTER (name), REGPARM_MAX);
1631 *no_add_attrs = true;
1634 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1636 error ("fastcall and regparm attributes are not compatible");
1643 /* Return 0 if the attributes for two types are incompatible, 1 if they
1644 are compatible, and 2 if they are nearly compatible (which causes a
1645 warning to be generated). */
1648 ix86_comp_type_attributes (tree type1, tree type2)
1650 /* Check for mismatch of non-default calling convention. */
1651 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1653 if (TREE_CODE (type1) != FUNCTION_TYPE)
1656 /* Check for mismatched fastcall types */
1657 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1658 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1661 /* Check for mismatched return types (cdecl vs stdcall). */
1662 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1663 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1668 /* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1669 DECL may be NULL when calling function indirectly
1670 or considerling a libcall. */
1673 ix86_function_regparm (tree type, tree decl)
1676 int regparm = ix86_regparm;
1677 bool user_convention = false;
1681 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1684 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1685 user_convention = true;
1688 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1691 user_convention = true;
1694 /* Use register calling convention for local functions when possible. */
1695 if (!TARGET_64BIT && !user_convention && decl
1696 && flag_unit_at_a_time)
1698 struct cgraph_local_info *i = cgraph_local_info (decl);
1701 /* We can't use regparm(3) for nested functions as these use
1702 static chain pointer in third argument. */
1703 if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1713 /* Value is the number of bytes of arguments automatically
1714 popped when returning from a subroutine call.
1715 FUNDECL is the declaration node of the function (as a tree),
1716 FUNTYPE is the data type of the function (as a tree),
1717 or for a library call it is an identifier node for the subroutine name.
1718 SIZE is the number of bytes of arguments passed on the stack.
1720 On the 80386, the RTD insn may be used to pop them if the number
1721 of args is fixed, but if the number is variable then the caller
1722 must pop them all. RTD can't be used for library calls now
1723 because the library is compiled with the Unix compiler.
1724 Use of RTD is a selectable option, since it is incompatible with
1725 standard Unix calling sequences. If the option is not selected,
1726 the caller must always pop the args.
1728 The attribute stdcall is equivalent to RTD on a per module basis. */
1731 ix86_return_pops_args (tree fundecl, tree funtype, int size)
1733 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1735 /* Cdecl functions override -mrtd, and never pop the stack. */
1736 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1738 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1739 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1740 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1744 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1745 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1746 == void_type_node)))
1750 /* Lose any fake structure return argument if it is passed on the stack. */
1751 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1754 int nregs = ix86_function_regparm (funtype, fundecl);
1757 return GET_MODE_SIZE (Pmode);
1763 /* Argument support functions. */
1765 /* Return true when register may be used to pass function parameters. */
1767 ix86_function_arg_regno_p (int regno)
1771 return (regno < REGPARM_MAX
1772 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1773 if (SSE_REGNO_P (regno) && TARGET_SSE)
1775 /* RAX is used as hidden argument to va_arg functions. */
1778 for (i = 0; i < REGPARM_MAX; i++)
1779 if (regno == x86_64_int_parameter_registers[i])
1784 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1785 for a call to a function whose data type is FNTYPE.
1786 For a library call, FNTYPE is 0. */
1789 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1790 tree fntype, /* tree ptr for function decl */
1791 rtx libname, /* SYMBOL_REF of library name or 0 */
1794 static CUMULATIVE_ARGS zero_cum;
1795 tree param, next_param;
1797 if (TARGET_DEBUG_ARG)
1799 fprintf (stderr, "\ninit_cumulative_args (");
1801 fprintf (stderr, "fntype code = %s, ret code = %s",
1802 tree_code_name[(int) TREE_CODE (fntype)],
1803 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1805 fprintf (stderr, "no fntype");
1808 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1813 /* Set up the number of registers to use for passing arguments. */
1815 cum->nregs = ix86_function_regparm (fntype, fndecl);
1817 cum->nregs = ix86_regparm;
1818 cum->sse_nregs = SSE_REGPARM_MAX;
1819 cum->maybe_vaarg = false;
1821 /* Use ecx and edx registers if function has fastcall attribute */
1822 if (fntype && !TARGET_64BIT)
1824 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1832 /* Determine if this function has variable arguments. This is
1833 indicated by the last argument being 'void_type_mode' if there
1834 are no variable arguments. If there are variable arguments, then
1835 we won't pass anything in registers */
1839 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1840 param != 0; param = next_param)
1842 next_param = TREE_CHAIN (param);
1843 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1850 cum->maybe_vaarg = true;
1854 if ((!fntype && !libname)
1855 || (fntype && !TYPE_ARG_TYPES (fntype)))
1856 cum->maybe_vaarg = 1;
1858 if (TARGET_DEBUG_ARG)
1859 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1864 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1865 of this code is to classify each 8bytes of incoming argument by the register
1866 class and assign registers accordingly. */
1868 /* Return the union class of CLASS1 and CLASS2.
1869 See the x86-64 PS ABI for details. */
1871 static enum x86_64_reg_class
1872 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1874 /* Rule #1: If both classes are equal, this is the resulting class. */
1875 if (class1 == class2)
1878 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1880 if (class1 == X86_64_NO_CLASS)
1882 if (class2 == X86_64_NO_CLASS)
1885 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1886 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1887 return X86_64_MEMORY_CLASS;
1889 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1890 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1891 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1892 return X86_64_INTEGERSI_CLASS;
1893 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1894 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1895 return X86_64_INTEGER_CLASS;
1897 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1898 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1899 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1900 return X86_64_MEMORY_CLASS;
1902 /* Rule #6: Otherwise class SSE is used. */
1903 return X86_64_SSE_CLASS;
1906 /* Classify the argument of type TYPE and mode MODE.
1907 CLASSES will be filled by the register class used to pass each word
1908 of the operand. The number of words is returned. In case the parameter
1909 should be passed in memory, 0 is returned. As a special case for zero
1910 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1912 BIT_OFFSET is used internally for handling records and specifies offset
1913 of the offset in bits modulo 256 to avoid overflow cases.
1915 See the x86-64 PS ABI for details.
1919 classify_argument (enum machine_mode mode, tree type,
1920 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1923 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1924 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1926 /* Variable sized entities are always passed/returned in memory. */
1930 if (mode != VOIDmode
1931 && MUST_PASS_IN_STACK (mode, type))
1934 if (type && AGGREGATE_TYPE_P (type))
1938 enum x86_64_reg_class subclasses[MAX_CLASSES];
1940 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1944 for (i = 0; i < words; i++)
1945 classes[i] = X86_64_NO_CLASS;
1947 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1948 signalize memory class, so handle it as special case. */
1951 classes[0] = X86_64_NO_CLASS;
1955 /* Classify each field of record and merge classes. */
1956 if (TREE_CODE (type) == RECORD_TYPE)
1958 /* For classes first merge in the field of the subclasses. */
1959 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1961 tree bases = TYPE_BINFO_BASETYPES (type);
1962 int n_bases = TREE_VEC_LENGTH (bases);
1965 for (i = 0; i < n_bases; ++i)
1967 tree binfo = TREE_VEC_ELT (bases, i);
1969 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1970 tree type = BINFO_TYPE (binfo);
1972 num = classify_argument (TYPE_MODE (type),
1974 (offset + bit_offset) % 256);
1977 for (i = 0; i < num; i++)
1979 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1981 merge_classes (subclasses[i], classes[i + pos]);
1985 /* And now merge the fields of structure. */
1986 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1988 if (TREE_CODE (field) == FIELD_DECL)
1992 /* Bitfields are always classified as integer. Handle them
1993 early, since later code would consider them to be
1994 misaligned integers. */
1995 if (DECL_BIT_FIELD (field))
1997 for (i = int_bit_position (field) / 8 / 8;
1998 i < (int_bit_position (field)
1999 + tree_low_cst (DECL_SIZE (field), 0)
2002 merge_classes (X86_64_INTEGER_CLASS,
2007 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2008 TREE_TYPE (field), subclasses,
2009 (int_bit_position (field)
2010 + bit_offset) % 256);
2013 for (i = 0; i < num; i++)
2016 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2018 merge_classes (subclasses[i], classes[i + pos]);
2024 /* Arrays are handled as small records. */
2025 else if (TREE_CODE (type) == ARRAY_TYPE)
2028 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2029 TREE_TYPE (type), subclasses, bit_offset);
2033 /* The partial classes are now full classes. */
2034 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2035 subclasses[0] = X86_64_SSE_CLASS;
2036 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2037 subclasses[0] = X86_64_INTEGER_CLASS;
2039 for (i = 0; i < words; i++)
2040 classes[i] = subclasses[i % num];
2042 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2043 else if (TREE_CODE (type) == UNION_TYPE
2044 || TREE_CODE (type) == QUAL_UNION_TYPE)
2046 /* For classes first merge in the field of the subclasses. */
2047 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2049 tree bases = TYPE_BINFO_BASETYPES (type);
2050 int n_bases = TREE_VEC_LENGTH (bases);
2053 for (i = 0; i < n_bases; ++i)
2055 tree binfo = TREE_VEC_ELT (bases, i);
2057 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2058 tree type = BINFO_TYPE (binfo);
2060 num = classify_argument (TYPE_MODE (type),
2062 (offset + (bit_offset % 64)) % 256);
2065 for (i = 0; i < num; i++)
2067 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2069 merge_classes (subclasses[i], classes[i + pos]);
2073 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2075 if (TREE_CODE (field) == FIELD_DECL)
2078 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2079 TREE_TYPE (field), subclasses,
2083 for (i = 0; i < num; i++)
2084 classes[i] = merge_classes (subclasses[i], classes[i]);
2091 /* Final merger cleanup. */
2092 for (i = 0; i < words; i++)
2094 /* If one class is MEMORY, everything should be passed in
2096 if (classes[i] == X86_64_MEMORY_CLASS)
2099 /* The X86_64_SSEUP_CLASS should be always preceded by
2100 X86_64_SSE_CLASS. */
2101 if (classes[i] == X86_64_SSEUP_CLASS
2102 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2103 classes[i] = X86_64_SSE_CLASS;
2105 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2106 if (classes[i] == X86_64_X87UP_CLASS
2107 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2108 classes[i] = X86_64_SSE_CLASS;
2113 /* Compute alignment needed. We align all types to natural boundaries with
2114 exception of XFmode that is aligned to 64bits. */
2115 if (mode != VOIDmode && mode != BLKmode)
2117 int mode_alignment = GET_MODE_BITSIZE (mode);
2120 mode_alignment = 128;
2121 else if (mode == XCmode)
2122 mode_alignment = 256;
2123 /* Misaligned fields are always returned in memory. */
2124 if (bit_offset % mode_alignment)
2128 /* Classification of atomic types. */
2138 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2139 classes[0] = X86_64_INTEGERSI_CLASS;
2141 classes[0] = X86_64_INTEGER_CLASS;
2145 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2148 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2149 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2152 if (!(bit_offset % 64))
2153 classes[0] = X86_64_SSESF_CLASS;
2155 classes[0] = X86_64_SSE_CLASS;
2158 classes[0] = X86_64_SSEDF_CLASS;
2161 classes[0] = X86_64_X87_CLASS;
2162 classes[1] = X86_64_X87UP_CLASS;
2165 classes[0] = X86_64_X87_CLASS;
2166 classes[1] = X86_64_X87UP_CLASS;
2167 classes[2] = X86_64_X87_CLASS;
2168 classes[3] = X86_64_X87UP_CLASS;
2171 classes[0] = X86_64_SSEDF_CLASS;
2172 classes[1] = X86_64_SSEDF_CLASS;
2175 classes[0] = X86_64_SSE_CLASS;
2183 classes[0] = X86_64_SSE_CLASS;
2184 classes[1] = X86_64_SSEUP_CLASS;
2199 /* Examine the argument and return set number of register required in each
2200 class. Return 0 iff parameter should be passed in memory. */
2202 examine_argument (enum machine_mode mode, tree type, int in_return,
2203 int *int_nregs, int *sse_nregs)
2205 enum x86_64_reg_class class[MAX_CLASSES];
2206 int n = classify_argument (mode, type, class, 0);
2212 for (n--; n >= 0; n--)
2215 case X86_64_INTEGER_CLASS:
2216 case X86_64_INTEGERSI_CLASS:
2219 case X86_64_SSE_CLASS:
2220 case X86_64_SSESF_CLASS:
2221 case X86_64_SSEDF_CLASS:
2224 case X86_64_NO_CLASS:
2225 case X86_64_SSEUP_CLASS:
2227 case X86_64_X87_CLASS:
2228 case X86_64_X87UP_CLASS:
2232 case X86_64_MEMORY_CLASS:
2237 /* Construct container for the argument used by GCC interface. See
2238 FUNCTION_ARG for the detailed description. */
2240 construct_container (enum machine_mode mode, tree type, int in_return,
2241 int nintregs, int nsseregs, const int * intreg,
2244 enum machine_mode tmpmode;
2246 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2247 enum x86_64_reg_class class[MAX_CLASSES];
2251 int needed_sseregs, needed_intregs;
2252 rtx exp[MAX_CLASSES];
2255 n = classify_argument (mode, type, class, 0);
2256 if (TARGET_DEBUG_ARG)
2259 fprintf (stderr, "Memory class\n");
2262 fprintf (stderr, "Classes:");
2263 for (i = 0; i < n; i++)
2265 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2267 fprintf (stderr, "\n");
2272 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2274 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2277 /* First construct simple cases. Avoid SCmode, since we want to use
2278 single register to pass this type. */
2279 if (n == 1 && mode != SCmode)
2282 case X86_64_INTEGER_CLASS:
2283 case X86_64_INTEGERSI_CLASS:
2284 return gen_rtx_REG (mode, intreg[0]);
2285 case X86_64_SSE_CLASS:
2286 case X86_64_SSESF_CLASS:
2287 case X86_64_SSEDF_CLASS:
2288 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2289 case X86_64_X87_CLASS:
2290 return gen_rtx_REG (mode, FIRST_STACK_REG);
2291 case X86_64_NO_CLASS:
2292 /* Zero sized array, struct or class. */
2297 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2298 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2300 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2301 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2302 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2303 && class[1] == X86_64_INTEGER_CLASS
2304 && (mode == CDImode || mode == TImode)
2305 && intreg[0] + 1 == intreg[1])
2306 return gen_rtx_REG (mode, intreg[0]);
2308 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2309 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2310 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2312 /* Otherwise figure out the entries of the PARALLEL. */
2313 for (i = 0; i < n; i++)
2317 case X86_64_NO_CLASS:
2319 case X86_64_INTEGER_CLASS:
2320 case X86_64_INTEGERSI_CLASS:
2321 /* Merge TImodes on aligned occasions here too. */
2322 if (i * 8 + 8 > bytes)
2323 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2324 else if (class[i] == X86_64_INTEGERSI_CLASS)
2328 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2329 if (tmpmode == BLKmode)
2331 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2332 gen_rtx_REG (tmpmode, *intreg),
2336 case X86_64_SSESF_CLASS:
2337 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2338 gen_rtx_REG (SFmode,
2339 SSE_REGNO (sse_regno)),
2343 case X86_64_SSEDF_CLASS:
2344 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2345 gen_rtx_REG (DFmode,
2346 SSE_REGNO (sse_regno)),
2350 case X86_64_SSE_CLASS:
2351 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2355 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2356 gen_rtx_REG (tmpmode,
2357 SSE_REGNO (sse_regno)),
2359 if (tmpmode == TImode)
2367 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2368 for (i = 0; i < nexps; i++)
2369 XVECEXP (ret, 0, i) = exp [i];
2373 /* Update the data in CUM to advance over an argument
2374 of mode MODE and data type TYPE.
2375 (TYPE is null for libcalls where that information may not be available.) */
2378 function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
2379 enum machine_mode mode, /* current arg mode */
2380 tree type, /* type of the argument or 0 if lib support */
2381 int named) /* whether or not the argument was named */
2384 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2385 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2387 if (TARGET_DEBUG_ARG)
2389 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2390 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2393 int int_nregs, sse_nregs;
2394 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2395 cum->words += words;
2396 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2398 cum->nregs -= int_nregs;
2399 cum->sse_nregs -= sse_nregs;
2400 cum->regno += int_nregs;
2401 cum->sse_regno += sse_nregs;
2404 cum->words += words;
2408 if (TARGET_SSE && mode == TImode)
2410 cum->sse_words += words;
2411 cum->sse_nregs -= 1;
2412 cum->sse_regno += 1;
2413 if (cum->sse_nregs <= 0)
2421 cum->words += words;
2422 cum->nregs -= words;
2423 cum->regno += words;
2425 if (cum->nregs <= 0)
2435 /* Define where to put the arguments to a function.
2436 Value is zero to push the argument on the stack,
2437 or a hard register in which to store the argument.
2439 MODE is the argument's machine mode.
2440 TYPE is the data type of the argument (as a tree).
2441 This is null for libcalls where that information may
2443 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2444 the preceding args and about the function being called.
2445 NAMED is nonzero if this argument is a named parameter
2446 (otherwise it is an extra parameter matching an ellipsis). */
2449 function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
2450 enum machine_mode mode, /* current arg mode */
2451 tree type, /* type of the argument or 0 if lib support */
2452 int named) /* != 0 for normal args, == 0 for ... args */
2456 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2457 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2459 /* Handle a hidden AL argument containing number of registers for varargs
2460 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2462 if (mode == VOIDmode)
2465 return GEN_INT (cum->maybe_vaarg
2466 ? (cum->sse_nregs < 0
2474 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2475 &x86_64_int_parameter_registers [cum->regno],
2480 /* For now, pass fp/complex values on the stack. */
2492 if (words <= cum->nregs)
2494 int regno = cum->regno;
2496 /* Fastcall allocates the first two DWORD (SImode) or
2497 smaller arguments to ECX and EDX. */
2500 if (mode == BLKmode || mode == DImode)
2503 /* ECX not EAX is the first allocated register. */
2507 ret = gen_rtx_REG (mode, regno);
2512 ret = gen_rtx_REG (mode, cum->sse_regno);
2516 if (TARGET_DEBUG_ARG)
2519 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2520 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2523 print_simple_rtl (stderr, ret);
2525 fprintf (stderr, ", stack");
2527 fprintf (stderr, " )\n");
2533 /* A C expression that indicates when an argument must be passed by
2534 reference. If nonzero for an argument, a copy of that argument is
2535 made in memory and a pointer to the argument is passed instead of
2536 the argument itself. The pointer is passed in whatever way is
2537 appropriate for passing a pointer to that type. */
2540 function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2541 enum machine_mode mode ATTRIBUTE_UNUSED,
2542 tree type, int named ATTRIBUTE_UNUSED)
2547 if (type && int_size_in_bytes (type) == -1)
2549 if (TARGET_DEBUG_ARG)
2550 fprintf (stderr, "function_arg_pass_by_reference\n");
2557 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2560 contains_128bit_aligned_vector_p (tree type)
2562 enum machine_mode mode = TYPE_MODE (type);
2563 if (SSE_REG_MODE_P (mode)
2564 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2566 if (TYPE_ALIGN (type) < 128)
2569 if (AGGREGATE_TYPE_P (type))
2571 /* Walk the aggregates recursively. */
2572 if (TREE_CODE (type) == RECORD_TYPE
2573 || TREE_CODE (type) == UNION_TYPE
2574 || TREE_CODE (type) == QUAL_UNION_TYPE)
2578 if (TYPE_BINFO (type) != NULL
2579 && TYPE_BINFO_BASETYPES (type) != NULL)
2581 tree bases = TYPE_BINFO_BASETYPES (type);
2582 int n_bases = TREE_VEC_LENGTH (bases);
2585 for (i = 0; i < n_bases; ++i)
2587 tree binfo = TREE_VEC_ELT (bases, i);
2588 tree type = BINFO_TYPE (binfo);
2590 if (contains_128bit_aligned_vector_p (type))
2594 /* And now merge the fields of structure. */
2595 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2597 if (TREE_CODE (field) == FIELD_DECL
2598 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2602 /* Just for use if some languages passes arrays by value. */
2603 else if (TREE_CODE (type) == ARRAY_TYPE)
2605 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2614 /* Gives the alignment boundary, in bits, of an argument with the
2615 specified mode and type. */
2618 ix86_function_arg_boundary (enum machine_mode mode, tree type)
2622 align = TYPE_ALIGN (type);
2624 align = GET_MODE_ALIGNMENT (mode);
2625 if (align < PARM_BOUNDARY)
2626 align = PARM_BOUNDARY;
2629 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2630 make an exception for SSE modes since these require 128bit
2633 The handling here differs from field_alignment. ICC aligns MMX
2634 arguments to 4 byte boundaries, while structure fields are aligned
2635 to 8 byte boundaries. */
2638 if (!SSE_REG_MODE_P (mode))
2639 align = PARM_BOUNDARY;
2643 if (!contains_128bit_aligned_vector_p (type))
2644 align = PARM_BOUNDARY;
2652 /* Return true if N is a possible register number of function value. */
2654 ix86_function_value_regno_p (int regno)
2658 return ((regno) == 0
2659 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2660 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2662 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2663 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2664 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2667 /* Define how to find the value returned by a function.
2668 VALTYPE is the data type of the value (as a tree).
2669 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2670 otherwise, FUNC is 0. */
2672 ix86_function_value (tree valtype)
2676 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2677 REGPARM_MAX, SSE_REGPARM_MAX,
2678 x86_64_int_return_registers, 0);
2679 /* For zero sized structures, construct_container return NULL, but we need
2680 to keep rest of compiler happy by returning meaningful value. */
2682 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2686 return gen_rtx_REG (TYPE_MODE (valtype),
2687 ix86_value_regno (TYPE_MODE (valtype)));
2690 /* Return false iff type is returned in memory. */
2692 ix86_return_in_memory (tree type)
2694 int needed_intregs, needed_sseregs, size;
2695 enum machine_mode mode = TYPE_MODE (type);
2698 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2700 if (mode == BLKmode)
2703 size = int_size_in_bytes (type);
2705 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2708 if (VECTOR_MODE_P (mode) || mode == TImode)
2710 /* User-created vectors small enough to fit in EAX. */
2714 /* MMX/3dNow values are returned on the stack, since we've
2715 got to EMMS/FEMMS before returning. */
2719 /* SSE values are returned in XMM0. */
2720 /* ??? Except when it doesn't exist? We have a choice of
2721 either (1) being abi incompatible with a -march switch,
2722 or (2) generating an error here. Given no good solution,
2723 I think the safest thing is one warning. The user won't
2724 be able to use -Werror, but... */
2735 warning ("SSE vector return without SSE enabled "
2749 /* Define how to find the value returned by a library function
2750 assuming the value has mode MODE. */
2752 ix86_libcall_value (enum machine_mode mode)
2762 return gen_rtx_REG (mode, FIRST_SSE_REG);
2765 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2767 return gen_rtx_REG (mode, 0);
2771 return gen_rtx_REG (mode, ix86_value_regno (mode));
2774 /* Given a mode, return the register to use for a return value. */
2777 ix86_value_regno (enum machine_mode mode)
2779 /* Floating point return values in %st(0). */
2780 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2781 return FIRST_FLOAT_REG;
2782 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
2783 we prevent this case when sse is not available. */
2784 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2785 return FIRST_SSE_REG;
2786 /* Everything else in %eax. */
2790 /* Create the va_list data type. */
2793 ix86_build_va_list (void)
2795 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2797 /* For i386 we use plain pointer to argument area. */
2799 return build_pointer_type (char_type_node);
2801 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2802 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2804 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2805 unsigned_type_node);
2806 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2807 unsigned_type_node);
2808 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2810 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2813 DECL_FIELD_CONTEXT (f_gpr) = record;
2814 DECL_FIELD_CONTEXT (f_fpr) = record;
2815 DECL_FIELD_CONTEXT (f_ovf) = record;
2816 DECL_FIELD_CONTEXT (f_sav) = record;
2818 TREE_CHAIN (record) = type_decl;
2819 TYPE_NAME (record) = type_decl;
2820 TYPE_FIELDS (record) = f_gpr;
2821 TREE_CHAIN (f_gpr) = f_fpr;
2822 TREE_CHAIN (f_fpr) = f_ovf;
2823 TREE_CHAIN (f_ovf) = f_sav;
2825 layout_type (record);
2827 /* The correct type is an array type of one element. */
2828 return build_array_type (record, build_index_type (size_zero_node));
2831 /* Perform any needed actions needed for a function that is receiving a
2832 variable number of arguments.
2836 MODE and TYPE are the mode and type of the current parameter.
2838 PRETEND_SIZE is a variable that should be set to the amount of stack
2839 that must be pushed by the prolog to pretend that our caller pushed
2842 Normally, this macro will push all remaining incoming registers on the
2843 stack and set PRETEND_SIZE to the length of the registers pushed. */
2846 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
2847 tree type, int *pretend_size ATTRIBUTE_UNUSED,
2850 CUMULATIVE_ARGS next_cum;
2851 rtx save_area = NULL_RTX, mem;
2864 /* Indicate to allocate space on the stack for varargs save area. */
2865 ix86_save_varrargs_registers = 1;
2867 cfun->stack_alignment_needed = 128;
2869 fntype = TREE_TYPE (current_function_decl);
2870 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2871 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2872 != void_type_node));
2874 /* For varargs, we do not want to skip the dummy va_dcl argument.
2875 For stdargs, we do want to skip the last named argument. */
2878 function_arg_advance (&next_cum, mode, type, 1);
2881 save_area = frame_pointer_rtx;
2883 set = get_varargs_alias_set ();
2885 for (i = next_cum.regno; i < ix86_regparm; i++)
2887 mem = gen_rtx_MEM (Pmode,
2888 plus_constant (save_area, i * UNITS_PER_WORD));
2889 set_mem_alias_set (mem, set);
2890 emit_move_insn (mem, gen_rtx_REG (Pmode,
2891 x86_64_int_parameter_registers[i]));
2894 if (next_cum.sse_nregs)
2896 /* Now emit code to save SSE registers. The AX parameter contains number
2897 of SSE parameter registers used to call this function. We use
2898 sse_prologue_save insn template that produces computed jump across
2899 SSE saves. We need some preparation work to get this working. */
2901 label = gen_label_rtx ();
2902 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2904 /* Compute address to jump to :
2905 label - 5*eax + nnamed_sse_arguments*5 */
2906 tmp_reg = gen_reg_rtx (Pmode);
2907 nsse_reg = gen_reg_rtx (Pmode);
2908 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2909 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2910 gen_rtx_MULT (Pmode, nsse_reg,
2912 if (next_cum.sse_regno)
2915 gen_rtx_CONST (DImode,
2916 gen_rtx_PLUS (DImode,
2918 GEN_INT (next_cum.sse_regno * 4))));
2920 emit_move_insn (nsse_reg, label_ref);
2921 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2923 /* Compute address of memory block we save into. We always use pointer
2924 pointing 127 bytes after first byte to store - this is needed to keep
2925 instruction size limited by 4 bytes. */
2926 tmp_reg = gen_reg_rtx (Pmode);
2927 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2928 plus_constant (save_area,
2929 8 * REGPARM_MAX + 127)));
2930 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2931 set_mem_alias_set (mem, set);
2932 set_mem_align (mem, BITS_PER_WORD);
2934 /* And finally do the dirty job! */
2935 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2936 GEN_INT (next_cum.sse_regno), label));
2941 /* Implement va_start. */
2944 ix86_va_start (tree valist, rtx nextarg)
2946 HOST_WIDE_INT words, n_gpr, n_fpr;
2947 tree f_gpr, f_fpr, f_ovf, f_sav;
2948 tree gpr, fpr, ovf, sav, t;
2950 /* Only 64bit target needs something special. */
2953 std_expand_builtin_va_start (valist, nextarg);
2957 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2958 f_fpr = TREE_CHAIN (f_gpr);
2959 f_ovf = TREE_CHAIN (f_fpr);
2960 f_sav = TREE_CHAIN (f_ovf);
2962 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2963 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2964 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2965 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2966 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2968 /* Count number of gp and fp argument registers used. */
2969 words = current_function_args_info.words;
2970 n_gpr = current_function_args_info.regno;
2971 n_fpr = current_function_args_info.sse_regno;
2973 if (TARGET_DEBUG_ARG)
2974 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2975 (int) words, (int) n_gpr, (int) n_fpr);
2977 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2978 build_int_2 (n_gpr * 8, 0));
2979 TREE_SIDE_EFFECTS (t) = 1;
2980 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2982 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2983 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2984 TREE_SIDE_EFFECTS (t) = 1;
2985 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2987 /* Find the overflow area. */
2988 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2990 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2991 build_int_2 (words * UNITS_PER_WORD, 0));
2992 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2993 TREE_SIDE_EFFECTS (t) = 1;
2994 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2996 /* Find the register save area.
2997 Prologue of the function save it right above stack frame. */
2998 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2999 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3000 TREE_SIDE_EFFECTS (t) = 1;
3001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3004 /* Implement va_arg. */
3006 ix86_va_arg (tree valist, tree type)
3008 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3009 tree f_gpr, f_fpr, f_ovf, f_sav;
3010 tree gpr, fpr, ovf, sav, t;
3012 rtx lab_false, lab_over = NULL_RTX;
3017 /* Only 64bit target needs something special. */
3020 return std_expand_builtin_va_arg (valist, type);
3023 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3024 f_fpr = TREE_CHAIN (f_gpr);
3025 f_ovf = TREE_CHAIN (f_fpr);
3026 f_sav = TREE_CHAIN (f_ovf);
3028 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3029 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3030 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3031 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3032 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3034 size = int_size_in_bytes (type);
3037 /* Passed by reference. */
3039 type = build_pointer_type (type);
3040 size = int_size_in_bytes (type);
3042 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3044 container = construct_container (TYPE_MODE (type), type, 0,
3045 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3047 * Pull the value out of the saved registers ...
3050 addr_rtx = gen_reg_rtx (Pmode);
3054 rtx int_addr_rtx, sse_addr_rtx;
3055 int needed_intregs, needed_sseregs;
3058 lab_over = gen_label_rtx ();
3059 lab_false = gen_label_rtx ();
3061 examine_argument (TYPE_MODE (type), type, 0,
3062 &needed_intregs, &needed_sseregs);
3065 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3066 || TYPE_ALIGN (type) > 128);
3068 /* In case we are passing structure, verify that it is consecutive block
3069 on the register save area. If not we need to do moves. */
3070 if (!need_temp && !REG_P (container))
3072 /* Verify that all registers are strictly consecutive */
3073 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3077 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3079 rtx slot = XVECEXP (container, 0, i);
3080 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3081 || INTVAL (XEXP (slot, 1)) != i * 16)
3089 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3091 rtx slot = XVECEXP (container, 0, i);
3092 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3093 || INTVAL (XEXP (slot, 1)) != i * 8)
3100 int_addr_rtx = addr_rtx;
3101 sse_addr_rtx = addr_rtx;
3105 int_addr_rtx = gen_reg_rtx (Pmode);
3106 sse_addr_rtx = gen_reg_rtx (Pmode);
3108 /* First ensure that we fit completely in registers. */
3111 emit_cmp_and_jump_insns (expand_expr
3112 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3113 GEN_INT ((REGPARM_MAX - needed_intregs +
3114 1) * 8), GE, const1_rtx, SImode,
3119 emit_cmp_and_jump_insns (expand_expr
3120 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3121 GEN_INT ((SSE_REGPARM_MAX -
3122 needed_sseregs + 1) * 16 +
3123 REGPARM_MAX * 8), GE, const1_rtx,
3124 SImode, 1, lab_false);
3127 /* Compute index to start of area used for integer regs. */
3130 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3131 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3132 if (r != int_addr_rtx)
3133 emit_move_insn (int_addr_rtx, r);
3137 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3138 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3139 if (r != sse_addr_rtx)
3140 emit_move_insn (sse_addr_rtx, r);
3148 /* Never use the memory itself, as it has the alias set. */
3149 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3150 mem = gen_rtx_MEM (BLKmode, x);
3151 force_operand (x, addr_rtx);
3152 set_mem_alias_set (mem, get_varargs_alias_set ());
3153 set_mem_align (mem, BITS_PER_UNIT);
3155 for (i = 0; i < XVECLEN (container, 0); i++)
3157 rtx slot = XVECEXP (container, 0, i);
3158 rtx reg = XEXP (slot, 0);
3159 enum machine_mode mode = GET_MODE (reg);
3165 if (SSE_REGNO_P (REGNO (reg)))
3167 src_addr = sse_addr_rtx;
3168 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3172 src_addr = int_addr_rtx;
3173 src_offset = REGNO (reg) * 8;
3175 src_mem = gen_rtx_MEM (mode, src_addr);
3176 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3177 src_mem = adjust_address (src_mem, mode, src_offset);
3178 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3179 emit_move_insn (dest_mem, src_mem);
3186 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3187 build_int_2 (needed_intregs * 8, 0));
3188 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3189 TREE_SIDE_EFFECTS (t) = 1;
3190 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3195 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3196 build_int_2 (needed_sseregs * 16, 0));
3197 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3198 TREE_SIDE_EFFECTS (t) = 1;
3199 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3202 emit_jump_insn (gen_jump (lab_over));
3204 emit_label (lab_false);
3207 /* ... otherwise out of the overflow area. */
3209 /* Care for on-stack alignment if needed. */
3210 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3214 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3215 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3216 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3220 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3222 emit_move_insn (addr_rtx, r);
3225 build (PLUS_EXPR, TREE_TYPE (t), t,
3226 build_int_2 (rsize * UNITS_PER_WORD, 0));
3227 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3228 TREE_SIDE_EFFECTS (t) = 1;
3229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3232 emit_label (lab_over);
3236 r = gen_rtx_MEM (Pmode, addr_rtx);
3237 set_mem_alias_set (r, get_varargs_alias_set ());
3238 emit_move_insn (addr_rtx, r);
3244 /* Return nonzero if OP is either a i387 or SSE fp register. */
3246 any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3248 return ANY_FP_REG_P (op);
3251 /* Return nonzero if OP is an i387 fp register. */
3253 fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3255 return FP_REG_P (op);
3258 /* Return nonzero if OP is a non-fp register_operand. */
3260 register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3262 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3265 /* Return nonzero if OP is a register operand other than an
3266 i387 fp register. */
3268 register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3270 return register_operand (op, mode) && !FP_REG_P (op);
3273 /* Return nonzero if OP is general operand representable on x86_64. */
3276 x86_64_general_operand (rtx op, enum machine_mode mode)
3279 return general_operand (op, mode);
3280 if (nonimmediate_operand (op, mode))
3282 return x86_64_sign_extended_value (op);
3285 /* Return nonzero if OP is general operand representable on x86_64
3286 as either sign extended or zero extended constant. */
3289 x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3292 return general_operand (op, mode);
3293 if (nonimmediate_operand (op, mode))
3295 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3298 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3301 x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3304 return nonmemory_operand (op, mode);
3305 if (register_operand (op, mode))
3307 return x86_64_sign_extended_value (op);
3310 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3313 x86_64_movabs_operand (rtx op, enum machine_mode mode)
3315 if (!TARGET_64BIT || !flag_pic)
3316 return nonmemory_operand (op, mode);
3317 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3319 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3324 /* Return nonzero if OPNUM's MEM should be matched
3325 in movabs* patterns. */
3328 ix86_check_movabs (rtx insn, int opnum)
3332 set = PATTERN (insn);
3333 if (GET_CODE (set) == PARALLEL)
3334 set = XVECEXP (set, 0, 0);
3335 if (GET_CODE (set) != SET)
3337 mem = XEXP (set, opnum);
3338 while (GET_CODE (mem) == SUBREG)
3339 mem = SUBREG_REG (mem);
3340 if (GET_CODE (mem) != MEM)
3342 return (volatile_ok || !MEM_VOLATILE_P (mem));
3345 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3348 x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3351 return nonmemory_operand (op, mode);
3352 if (register_operand (op, mode))
3354 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3357 /* Return nonzero if OP is immediate operand representable on x86_64. */
3360 x86_64_immediate_operand (rtx op, enum machine_mode mode)
3363 return immediate_operand (op, mode);
3364 return x86_64_sign_extended_value (op);
3367 /* Return nonzero if OP is immediate operand representable on x86_64. */
3370 x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3372 return x86_64_zero_extended_value (op);
3375 /* Return nonzero if OP is (const_int 1), else return zero. */
3378 const_int_1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3380 return op == const1_rtx;
3383 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3384 for shift & compare patterns, as shifting by 0 does not change flags),
3385 else return zero. */
3388 const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3390 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3393 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3394 reference and a constant. */
3397 symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3399 switch (GET_CODE (op))
3407 if (GET_CODE (op) == SYMBOL_REF
3408 || GET_CODE (op) == LABEL_REF
3409 || (GET_CODE (op) == UNSPEC
3410 && (XINT (op, 1) == UNSPEC_GOT
3411 || XINT (op, 1) == UNSPEC_GOTOFF
3412 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3414 if (GET_CODE (op) != PLUS
3415 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3419 if (GET_CODE (op) == SYMBOL_REF
3420 || GET_CODE (op) == LABEL_REF)
3422 /* Only @GOTOFF gets offsets. */
3423 if (GET_CODE (op) != UNSPEC
3424 || XINT (op, 1) != UNSPEC_GOTOFF)
3427 op = XVECEXP (op, 0, 0);
3428 if (GET_CODE (op) == SYMBOL_REF
3429 || GET_CODE (op) == LABEL_REF)
3438 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3441 pic_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3443 if (GET_CODE (op) != CONST)
3448 if (GET_CODE (op) == UNSPEC
3449 && XINT (op, 1) == UNSPEC_GOTPCREL)
3451 if (GET_CODE (op) == PLUS
3452 && GET_CODE (XEXP (op, 0)) == UNSPEC
3453 && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3458 if (GET_CODE (op) == UNSPEC)
3460 if (GET_CODE (op) != PLUS
3461 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3464 if (GET_CODE (op) == UNSPEC)
3470 /* Return true if OP is a symbolic operand that resolves locally. */
3473 local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3475 if (GET_CODE (op) == CONST
3476 && GET_CODE (XEXP (op, 0)) == PLUS
3477 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3478 op = XEXP (XEXP (op, 0), 0);
3480 if (GET_CODE (op) == LABEL_REF)
3483 if (GET_CODE (op) != SYMBOL_REF)
3486 if (SYMBOL_REF_LOCAL_P (op))
3489 /* There is, however, a not insubstantial body of code in the rest of
3490 the compiler that assumes it can just stick the results of
3491 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3492 /* ??? This is a hack. Should update the body of the compiler to
3493 always create a DECL an invoke targetm.encode_section_info. */
3494 if (strncmp (XSTR (op, 0), internal_label_prefix,
3495 internal_label_prefix_len) == 0)
3501 /* Test for various thread-local symbols. */
3504 tls_symbolic_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3506 if (GET_CODE (op) != SYMBOL_REF)
3508 return SYMBOL_REF_TLS_MODEL (op);
3512 tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3514 if (GET_CODE (op) != SYMBOL_REF)
3516 return SYMBOL_REF_TLS_MODEL (op) == kind;
3520 global_dynamic_symbolic_operand (register rtx op,
3521 enum machine_mode mode ATTRIBUTE_UNUSED)
3523 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3527 local_dynamic_symbolic_operand (register rtx op,
3528 enum machine_mode mode ATTRIBUTE_UNUSED)
3530 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3534 initial_exec_symbolic_operand (register rtx op,
3535 enum machine_mode mode ATTRIBUTE_UNUSED)
3537 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3541 local_exec_symbolic_operand (register rtx op,
3542 enum machine_mode mode ATTRIBUTE_UNUSED)
3544 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3547 /* Test for a valid operand for a call instruction. Don't allow the
3548 arg pointer register or virtual regs since they may decay into
3549 reg + const, which the patterns can't handle. */
3552 call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3554 /* Disallow indirect through a virtual register. This leads to
3555 compiler aborts when trying to eliminate them. */
3556 if (GET_CODE (op) == REG
3557 && (op == arg_pointer_rtx
3558 || op == frame_pointer_rtx
3559 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3560 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3563 /* Disallow `call 1234'. Due to varying assembler lameness this
3564 gets either rejected or translated to `call .+1234'. */
3565 if (GET_CODE (op) == CONST_INT)
3568 /* Explicitly allow SYMBOL_REF even if pic. */
3569 if (GET_CODE (op) == SYMBOL_REF)
3572 /* Otherwise we can allow any general_operand in the address. */
3573 return general_operand (op, Pmode);
3576 /* Test for a valid operand for a call instruction. Don't allow the
3577 arg pointer register or virtual regs since they may decay into
3578 reg + const, which the patterns can't handle. */
3581 sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3583 /* Disallow indirect through a virtual register. This leads to
3584 compiler aborts when trying to eliminate them. */
3585 if (GET_CODE (op) == REG
3586 && (op == arg_pointer_rtx
3587 || op == frame_pointer_rtx
3588 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3589 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3592 /* Explicitly allow SYMBOL_REF even if pic. */
3593 if (GET_CODE (op) == SYMBOL_REF)
3596 /* Otherwise we can only allow register operands. */
3597 return register_operand (op, Pmode);
3601 constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3603 if (GET_CODE (op) == CONST
3604 && GET_CODE (XEXP (op, 0)) == PLUS
3605 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3606 op = XEXP (XEXP (op, 0), 0);
3607 return GET_CODE (op) == SYMBOL_REF;
3610 /* Match exactly zero and one. */
3613 const0_operand (register rtx op, enum machine_mode mode)
3615 return op == CONST0_RTX (mode);
3619 const1_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3621 return op == const1_rtx;
3624 /* Match 2, 4, or 8. Used for leal multiplicands. */
3627 const248_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3629 return (GET_CODE (op) == CONST_INT
3630 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3634 const_0_to_3_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3636 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3640 const_0_to_7_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3642 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3646 const_0_to_15_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3648 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3652 const_0_to_255_operand (register rtx op,
3653 enum machine_mode mode ATTRIBUTE_UNUSED)
3655 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3659 /* True if this is a constant appropriate for an increment or decrement. */
3662 incdec_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3664 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3665 registers, since carry flag is not set. */
3666 if (TARGET_PENTIUM4 && !optimize_size)
3668 return op == const1_rtx || op == constm1_rtx;
3671 /* Return nonzero if OP is acceptable as operand of DImode shift
3675 shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3678 return nonimmediate_operand (op, mode);
3680 return register_operand (op, mode);
3683 /* Return false if this is the stack pointer, or any other fake
3684 register eliminable to the stack pointer. Otherwise, this is
3687 This is used to prevent esp from being used as an index reg.
3688 Which would only happen in pathological cases. */
3691 reg_no_sp_operand (register rtx op, enum machine_mode mode)
3694 if (GET_CODE (t) == SUBREG)
3696 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3699 return register_operand (op, mode);
3703 mmx_reg_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3705 return MMX_REG_P (op);
3708 /* Return false if this is any eliminable register. Otherwise
3712 general_no_elim_operand (register rtx op, enum machine_mode mode)
3715 if (GET_CODE (t) == SUBREG)
3717 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3718 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3719 || t == virtual_stack_dynamic_rtx)
3722 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3723 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3726 return general_operand (op, mode);
3729 /* Return false if this is any eliminable register. Otherwise
3730 register_operand or const_int. */
3733 nonmemory_no_elim_operand (register rtx op, enum machine_mode mode)
3736 if (GET_CODE (t) == SUBREG)
3738 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3739 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3740 || t == virtual_stack_dynamic_rtx)
3743 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3746 /* Return false if this is any eliminable register or stack register,
3747 otherwise work like register_operand. */
3750 index_register_operand (register rtx op, enum machine_mode mode)
3753 if (GET_CODE (t) == SUBREG)
3757 if (t == arg_pointer_rtx
3758 || t == frame_pointer_rtx
3759 || t == virtual_incoming_args_rtx
3760 || t == virtual_stack_vars_rtx
3761 || t == virtual_stack_dynamic_rtx
3762 || REGNO (t) == STACK_POINTER_REGNUM)
3765 return general_operand (op, mode);
3768 /* Return true if op is a Q_REGS class register. */
3771 q_regs_operand (register rtx op, enum machine_mode mode)
3773 if (mode != VOIDmode && GET_MODE (op) != mode)
3775 if (GET_CODE (op) == SUBREG)
3776 op = SUBREG_REG (op);
3777 return ANY_QI_REG_P (op);
3780 /* Return true if op is an flags register. */
3783 flags_reg_operand (register rtx op, enum machine_mode mode)
3785 if (mode != VOIDmode && GET_MODE (op) != mode)
3787 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3790 /* Return true if op is a NON_Q_REGS class register. */
3793 non_q_regs_operand (register rtx op, enum machine_mode mode)
3795 if (mode != VOIDmode && GET_MODE (op) != mode)
3797 if (GET_CODE (op) == SUBREG)
3798 op = SUBREG_REG (op);
3799 return NON_QI_REG_P (op);
3803 zero_extended_scalar_load_operand (rtx op,
3804 enum machine_mode mode ATTRIBUTE_UNUSED)
3807 if (GET_CODE (op) != MEM)
3809 op = maybe_get_pool_constant (op);
3812 if (GET_CODE (op) != CONST_VECTOR)
3815 (GET_MODE_SIZE (GET_MODE (op)) /
3816 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3817 for (n_elts--; n_elts > 0; n_elts--)
3819 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3820 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3826 /* Return 1 when OP is operand acceptable for standard SSE move. */
3828 vector_move_operand (rtx op, enum machine_mode mode)
3830 if (nonimmediate_operand (op, mode))
3832 if (GET_MODE (op) != mode && mode != VOIDmode)
3834 return (op == CONST0_RTX (GET_MODE (op)));
3837 /* Return true if op if a valid address, and does not contain
3838 a segment override. */
3841 no_seg_address_operand (register rtx op, enum machine_mode mode)
3843 struct ix86_address parts;
3845 if (! address_operand (op, mode))
3848 if (! ix86_decompose_address (op, &parts))
3851 return parts.seg == SEG_DEFAULT;
3854 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3857 sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3859 enum rtx_code code = GET_CODE (op);
3862 /* Operations supported directly. */
3872 /* These are equivalent to ones above in non-IEEE comparisons. */
3879 return !TARGET_IEEE_FP;
3884 /* Return 1 if OP is a valid comparison operator in valid mode. */
3886 ix86_comparison_operator (register rtx op, enum machine_mode mode)
3888 enum machine_mode inmode;
3889 enum rtx_code code = GET_CODE (op);
3890 if (mode != VOIDmode && GET_MODE (op) != mode)
3892 if (GET_RTX_CLASS (code) != '<')
3894 inmode = GET_MODE (XEXP (op, 0));
3896 if (inmode == CCFPmode || inmode == CCFPUmode)
3898 enum rtx_code second_code, bypass_code;
3899 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3900 return (bypass_code == NIL && second_code == NIL);
3907 if (inmode == CCmode || inmode == CCGCmode
3908 || inmode == CCGOCmode || inmode == CCNOmode)
3911 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3912 if (inmode == CCmode)
3916 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3924 /* Return 1 if OP is a valid comparison operator testing carry flag
3927 ix86_carry_flag_operator (register rtx op, enum machine_mode mode)
3929 enum machine_mode inmode;
3930 enum rtx_code code = GET_CODE (op);
3932 if (mode != VOIDmode && GET_MODE (op) != mode)
3934 if (GET_RTX_CLASS (code) != '<')
3936 inmode = GET_MODE (XEXP (op, 0));
3937 if (GET_CODE (XEXP (op, 0)) != REG
3938 || REGNO (XEXP (op, 0)) != 17
3939 || XEXP (op, 1) != const0_rtx)
3942 if (inmode == CCFPmode || inmode == CCFPUmode)
3944 enum rtx_code second_code, bypass_code;
3946 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3947 if (bypass_code != NIL || second_code != NIL)
3949 code = ix86_fp_compare_code_to_integer (code);
3951 else if (inmode != CCmode)
3956 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3959 fcmov_comparison_operator (register rtx op, enum machine_mode mode)
3961 enum machine_mode inmode;
3962 enum rtx_code code = GET_CODE (op);
3964 if (mode != VOIDmode && GET_MODE (op) != mode)
3966 if (GET_RTX_CLASS (code) != '<')
3968 inmode = GET_MODE (XEXP (op, 0));
3969 if (inmode == CCFPmode || inmode == CCFPUmode)
3971 enum rtx_code second_code, bypass_code;
3973 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3974 if (bypass_code != NIL || second_code != NIL)
3976 code = ix86_fp_compare_code_to_integer (code);
3978 /* i387 supports just limited amount of conditional codes. */
3981 case LTU: case GTU: case LEU: case GEU:
3982 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3985 case ORDERED: case UNORDERED:
3993 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3996 promotable_binary_operator (register rtx op,
3997 enum machine_mode mode ATTRIBUTE_UNUSED)
3999 switch (GET_CODE (op))
4002 /* Modern CPUs have same latency for HImode and SImode multiply,
4003 but 386 and 486 do HImode multiply faster. */
4004 return ix86_tune > PROCESSOR_I486;
4016 /* Nearly general operand, but accept any const_double, since we wish
4017 to be able to drop them into memory rather than have them get pulled
4021 cmp_fp_expander_operand (register rtx op, enum machine_mode mode)
4023 if (mode != VOIDmode && mode != GET_MODE (op))
4025 if (GET_CODE (op) == CONST_DOUBLE)
4027 return general_operand (op, mode);
4030 /* Match an SI or HImode register for a zero_extract. */
4033 ext_register_operand (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4036 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4037 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4040 if (!register_operand (op, VOIDmode))
4043 /* Be careful to accept only registers having upper parts. */
4044 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4045 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4048 /* Return 1 if this is a valid binary floating-point operation.
4049 OP is the expression matched, and MODE is its mode. */
4052 binary_fp_operator (register rtx op, enum machine_mode mode)
4054 if (mode != VOIDmode && mode != GET_MODE (op))
4057 switch (GET_CODE (op))
4063 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4071 mult_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4073 return GET_CODE (op) == MULT;
4077 div_operator (register rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4079 return GET_CODE (op) == DIV;
4083 arith_or_logical_operator (rtx op, enum machine_mode mode)
4085 return ((mode == VOIDmode || GET_MODE (op) == mode)
4086 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4087 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4090 /* Returns 1 if OP is memory operand with a displacement. */
4093 memory_displacement_operand (register rtx op, enum machine_mode mode)
4095 struct ix86_address parts;
4097 if (! memory_operand (op, mode))
4100 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4103 return parts.disp != NULL_RTX;
4106 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4107 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4109 ??? It seems likely that this will only work because cmpsi is an
4110 expander, and no actual insns use this. */
4113 cmpsi_operand (rtx op, enum machine_mode mode)
4115 if (nonimmediate_operand (op, mode))
4118 if (GET_CODE (op) == AND
4119 && GET_MODE (op) == SImode
4120 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4121 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4122 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4123 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4124 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4125 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4131 /* Returns 1 if OP is memory operand that can not be represented by the
4135 long_memory_operand (register rtx op, enum machine_mode mode)
4137 if (! memory_operand (op, mode))
4140 return memory_address_length (op) != 0;
4143 /* Return nonzero if the rtx is known aligned. */
4146 aligned_operand (rtx op, enum machine_mode mode)
4148 struct ix86_address parts;
4150 if (!general_operand (op, mode))
4153 /* Registers and immediate operands are always "aligned". */
4154 if (GET_CODE (op) != MEM)
4157 /* Don't even try to do any aligned optimizations with volatiles. */
4158 if (MEM_VOLATILE_P (op))
4163 /* Pushes and pops are only valid on the stack pointer. */
4164 if (GET_CODE (op) == PRE_DEC
4165 || GET_CODE (op) == POST_INC)
4168 /* Decode the address. */
4169 if (! ix86_decompose_address (op, &parts))
4172 if (parts.base && GET_CODE (parts.base) == SUBREG)
4173 parts.base = SUBREG_REG (parts.base);
4174 if (parts.index && GET_CODE (parts.index) == SUBREG)
4175 parts.index = SUBREG_REG (parts.index);
4177 /* Look for some component that isn't known to be aligned. */
4181 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4186 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4191 if (GET_CODE (parts.disp) != CONST_INT
4192 || (INTVAL (parts.disp) & 3) != 0)
4196 /* Didn't find one -- this must be an aligned address. */
4200 /* Initialize the table of extra 80387 mathematical constants. */
4203 init_ext_80387_constants (void)
4205 static const char * cst[5] =
4207 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4208 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4209 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4210 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4211 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4215 for (i = 0; i < 5; i++)
4217 real_from_string (&ext_80387_constants_table[i], cst[i]);
4218 /* Ensure each constant is rounded to XFmode precision. */
4219 real_convert (&ext_80387_constants_table[i],
4220 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
4221 &ext_80387_constants_table[i]);
4224 ext_80387_constants_init = 1;
4227 /* Return true if the constant is something that can be loaded with
4228 a special instruction. */
4231 standard_80387_constant_p (rtx x)
4233 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4236 if (x == CONST0_RTX (GET_MODE (x)))
4238 if (x == CONST1_RTX (GET_MODE (x)))
4241 /* For XFmode constants, try to find a special 80387 instruction on
4242 those CPUs that benefit from them. */
4243 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
4244 && x86_ext_80387_constants & TUNEMASK)
4249 if (! ext_80387_constants_init)
4250 init_ext_80387_constants ();
4252 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4253 for (i = 0; i < 5; i++)
4254 if (real_identical (&r, &ext_80387_constants_table[i]))
4261 /* Return the opcode of the special instruction to be used to load
4265 standard_80387_constant_opcode (rtx x)
4267 switch (standard_80387_constant_p (x))
4287 /* Return the CONST_DOUBLE representing the 80387 constant that is
4288 loaded by the specified special instruction. The argument IDX
4289 matches the return value from standard_80387_constant_p. */
4292 standard_80387_constant_rtx (int idx)
4296 if (! ext_80387_constants_init)
4297 init_ext_80387_constants ();
4313 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4314 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
4317 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4320 standard_sse_constant_p (rtx x)
4322 if (x == const0_rtx)
4324 return (x == CONST0_RTX (GET_MODE (x)));
4327 /* Returns 1 if OP contains a symbol reference */
4330 symbolic_reference_mentioned_p (rtx op)
4332 register const char *fmt;
4335 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4338 fmt = GET_RTX_FORMAT (GET_CODE (op));
4339 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4345 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4346 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4350 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4357 /* Return 1 if it is appropriate to emit `ret' instructions in the
4358 body of a function. Do this only if the epilogue is simple, needing a
4359 couple of insns. Prior to reloading, we can't tell how many registers
4360 must be saved, so return 0 then. Return 0 if there is no frame
4361 marker to de-allocate.
4363 If NON_SAVING_SETJMP is defined and true, then it is not possible
4364 for the epilogue to be simple, so return 0. This is a special case
4365 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4366 until final, but jump_optimize may need to know sooner if a
4370 ix86_can_use_return_insn_p (void)
4372 struct ix86_frame frame;
4374 #ifdef NON_SAVING_SETJMP
4375 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4379 if (! reload_completed || frame_pointer_needed)
4382 /* Don't allow more than 32 pop, since that's all we can do
4383 with one instruction. */
4384 if (current_function_pops_args
4385 && current_function_args_size >= 32768)
4388 ix86_compute_frame_layout (&frame);
4389 return frame.to_allocate == 0 && frame.nregs == 0;
4392 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4394 x86_64_sign_extended_value (rtx value)
4396 switch (GET_CODE (value))
4398 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4399 to be at least 32 and this all acceptable constants are
4400 represented as CONST_INT. */
4402 if (HOST_BITS_PER_WIDE_INT == 32)
4406 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4407 return trunc_int_for_mode (val, SImode) == val;
4411 /* For certain code models, the symbolic references are known to fit.
4412 in CM_SMALL_PIC model we know it fits if it is local to the shared
4413 library. Don't count TLS SYMBOL_REFs here, since they should fit
4414 only if inside of UNSPEC handled below. */
4416 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4418 /* For certain code models, the code is near as well. */
4420 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4421 || ix86_cmodel == CM_KERNEL);
4423 /* We also may accept the offsetted memory references in certain special
4426 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4427 switch (XINT (XEXP (value, 0), 1))
4429 case UNSPEC_GOTPCREL:
4431 case UNSPEC_GOTNTPOFF:
4437 if (GET_CODE (XEXP (value, 0)) == PLUS)
4439 rtx op1 = XEXP (XEXP (value, 0), 0);
4440 rtx op2 = XEXP (XEXP (value, 0), 1);
4441 HOST_WIDE_INT offset;
4443 if (ix86_cmodel == CM_LARGE)
4445 if (GET_CODE (op2) != CONST_INT)
4447 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4448 switch (GET_CODE (op1))
4451 /* For CM_SMALL assume that latest object is 16MB before
4452 end of 31bits boundary. We may also accept pretty
4453 large negative constants knowing that all objects are
4454 in the positive half of address space. */
4455 if (ix86_cmodel == CM_SMALL
4456 && offset < 16*1024*1024
4457 && trunc_int_for_mode (offset, SImode) == offset)
4459 /* For CM_KERNEL we know that all object resist in the
4460 negative half of 32bits address space. We may not
4461 accept negative offsets, since they may be just off
4462 and we may accept pretty large positive ones. */
4463 if (ix86_cmodel == CM_KERNEL
4465 && trunc_int_for_mode (offset, SImode) == offset)
4469 /* These conditions are similar to SYMBOL_REF ones, just the
4470 constraints for code models differ. */
4471 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4472 && offset < 16*1024*1024
4473 && trunc_int_for_mode (offset, SImode) == offset)
4475 if (ix86_cmodel == CM_KERNEL
4477 && trunc_int_for_mode (offset, SImode) == offset)
4481 switch (XINT (op1, 1))
4486 && trunc_int_for_mode (offset, SImode) == offset)
4500 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4502 x86_64_zero_extended_value (rtx value)
4504 switch (GET_CODE (value))
4507 if (HOST_BITS_PER_WIDE_INT == 32)
4508 return (GET_MODE (value) == VOIDmode
4509 && !CONST_DOUBLE_HIGH (value));
4513 if (HOST_BITS_PER_WIDE_INT == 32)
4514 return INTVAL (value) >= 0;
4516 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4519 /* For certain code models, the symbolic references are known to fit. */
4521 return ix86_cmodel == CM_SMALL;
4523 /* For certain code models, the code is near as well. */
4525 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4527 /* We also may accept the offsetted memory references in certain special
4530 if (GET_CODE (XEXP (value, 0)) == PLUS)
4532 rtx op1 = XEXP (XEXP (value, 0), 0);
4533 rtx op2 = XEXP (XEXP (value, 0), 1);
4535 if (ix86_cmodel == CM_LARGE)
4537 switch (GET_CODE (op1))
4541 /* For small code model we may accept pretty large positive
4542 offsets, since one bit is available for free. Negative
4543 offsets are limited by the size of NULL pointer area
4544 specified by the ABI. */
4545 if (ix86_cmodel == CM_SMALL
4546 && GET_CODE (op2) == CONST_INT
4547 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4548 && (trunc_int_for_mode (INTVAL (op2), SImode)
4551 /* ??? For the kernel, we may accept adjustment of
4552 -0x10000000, since we know that it will just convert
4553 negative address space to positive, but perhaps this
4554 is not worthwhile. */
4557 /* These conditions are similar to SYMBOL_REF ones, just the
4558 constraints for code models differ. */
4559 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4560 && GET_CODE (op2) == CONST_INT
4561 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4562 && (trunc_int_for_mode (INTVAL (op2), SImode)
4576 /* Value should be nonzero if functions must have frame pointers.
4577 Zero means the frame pointer need not be set up (and parms may
4578 be accessed via the stack pointer) in functions that seem suitable. */
4581 ix86_frame_pointer_required (void)
4583 /* If we accessed previous frames, then the generated code expects
4584 to be able to access the saved ebp value in our frame. */
4585 if (cfun->machine->accesses_prev_frame)
4588 /* Several x86 os'es need a frame pointer for other reasons,
4589 usually pertaining to setjmp. */
4590 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4593 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4594 the frame pointer by default. Turn it back on now if we've not
4595 got a leaf function. */
4596 if (TARGET_OMIT_LEAF_FRAME_POINTER
4597 && (!current_function_is_leaf))
4600 if (current_function_profile)
4606 /* Record that the current function accesses previous call frames. */
4609 ix86_setup_frame_addresses (void)
4611 cfun->machine->accesses_prev_frame = 1;
4614 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4615 # define USE_HIDDEN_LINKONCE 1
4617 # define USE_HIDDEN_LINKONCE 0
4620 static int pic_labels_used;
4622 /* Fills in the label name that should be used for a pc thunk for
4623 the given register. */
4626 get_pc_thunk_name (char name[32], unsigned int regno)
4628 if (USE_HIDDEN_LINKONCE)
4629 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4631 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4635 /* This function generates code for -fpic that loads %ebx with
4636 the return address of the caller and then returns. */
4639 ix86_file_end (void)
4644 for (regno = 0; regno < 8; ++regno)
4648 if (! ((pic_labels_used >> regno) & 1))
4651 get_pc_thunk_name (name, regno);
4653 if (USE_HIDDEN_LINKONCE)
4657 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4659 TREE_PUBLIC (decl) = 1;
4660 TREE_STATIC (decl) = 1;
4661 DECL_ONE_ONLY (decl) = 1;
4663 (*targetm.asm_out.unique_section) (decl, 0);
4664 named_section (decl, NULL, 0);
4666 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4667 fputs ("\t.hidden\t", asm_out_file);
4668 assemble_name (asm_out_file, name);
4669 fputc ('\n', asm_out_file);
4670 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4675 ASM_OUTPUT_LABEL (asm_out_file, name);
4678 xops[0] = gen_rtx_REG (SImode, regno);
4679 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4680 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4681 output_asm_insn ("ret", xops);
4684 if (NEED_INDICATE_EXEC_STACK)
4685 file_end_indicate_exec_stack ();
4688 /* Emit code for the SET_GOT patterns. */
4691 output_set_got (rtx dest)
4696 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4698 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4700 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4703 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4705 output_asm_insn ("call\t%a2", xops);
4708 /* Output the "canonical" label name ("Lxx$pb") here too. This
4709 is what will be referred to by the Mach-O PIC subsystem. */
4710 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4712 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4713 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4716 output_asm_insn ("pop{l}\t%0", xops);
4721 get_pc_thunk_name (name, REGNO (dest));
4722 pic_labels_used |= 1 << REGNO (dest);
4724 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4725 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4726 output_asm_insn ("call\t%X2", xops);
4729 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4730 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4731 else if (!TARGET_MACHO)
4732 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4737 /* Generate an "push" pattern for input ARG. */
4742 return gen_rtx_SET (VOIDmode,
4744 gen_rtx_PRE_DEC (Pmode,
4745 stack_pointer_rtx)),
4749 /* Return >= 0 if there is an unused call-clobbered register available
4750 for the entire function. */
4753 ix86_select_alt_pic_regnum (void)
4755 if (current_function_is_leaf && !current_function_profile)
4758 for (i = 2; i >= 0; --i)
4759 if (!regs_ever_live[i])
4763 return INVALID_REGNUM;
4766 /* Return 1 if we need to save REGNO. */
4768 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4770 if (pic_offset_table_rtx
4771 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4772 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4773 || current_function_profile
4774 || current_function_calls_eh_return
4775 || current_function_uses_const_pool))
4777 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4782 if (current_function_calls_eh_return && maybe_eh_return)
4787 unsigned test = EH_RETURN_DATA_REGNO (i);
4788 if (test == INVALID_REGNUM)
4795 return (regs_ever_live[regno]
4796 && !call_used_regs[regno]
4797 && !fixed_regs[regno]
4798 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4801 /* Return number of registers to be saved on the stack. */
4804 ix86_nsaved_regs (void)
4809 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4810 if (ix86_save_reg (regno, true))
4815 /* Return the offset between two registers, one to be eliminated, and the other
4816 its replacement, at the start of a routine. */
4819 ix86_initial_elimination_offset (int from, int to)
4821 struct ix86_frame frame;
4822 ix86_compute_frame_layout (&frame);
4824 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4825 return frame.hard_frame_pointer_offset;
4826 else if (from == FRAME_POINTER_REGNUM
4827 && to == HARD_FRAME_POINTER_REGNUM)
4828 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4831 if (to != STACK_POINTER_REGNUM)
4833 else if (from == ARG_POINTER_REGNUM)
4834 return frame.stack_pointer_offset;
4835 else if (from != FRAME_POINTER_REGNUM)
4838 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4842 /* Fill structure ix86_frame about frame of currently computed function. */
4845 ix86_compute_frame_layout (struct ix86_frame *frame)
4847 HOST_WIDE_INT total_size;
4848 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4850 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4851 HOST_WIDE_INT size = get_frame_size ();
4853 frame->nregs = ix86_nsaved_regs ();
4856 /* During reload iteration the amount of registers saved can change.
4857 Recompute the value as needed. Do not recompute when amount of registers
4858 didn't change as reload does mutiple calls to the function and does not
4859 expect the decision to change within single iteration. */
4861 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4863 int count = frame->nregs;
4865 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4866 /* The fast prologue uses move instead of push to save registers. This
4867 is significantly longer, but also executes faster as modern hardware
4868 can execute the moves in parallel, but can't do that for push/pop.
4870 Be careful about choosing what prologue to emit: When function takes
4871 many instructions to execute we may use slow version as well as in
4872 case function is known to be outside hot spot (this is known with
4873 feedback only). Weight the size of function by number of registers
4874 to save as it is cheap to use one or two push instructions but very
4875 slow to use many of them. */
4877 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4878 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4879 || (flag_branch_probabilities
4880 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4881 cfun->machine->use_fast_prologue_epilogue = false;
4883 cfun->machine->use_fast_prologue_epilogue
4884 = !expensive_function_p (count);
4886 if (TARGET_PROLOGUE_USING_MOVE
4887 && cfun->machine->use_fast_prologue_epilogue)
4888 frame->save_regs_using_mov = true;
4890 frame->save_regs_using_mov = false;
4893 /* Skip return address and saved base pointer. */
4894 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4896 frame->hard_frame_pointer_offset = offset;
4898 /* Do some sanity checking of stack_alignment_needed and
4899 preferred_alignment, since i386 port is the only using those features
4900 that may break easily. */
4902 if (size && !stack_alignment_needed)
4904 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4906 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4908 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4911 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4912 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4914 /* Register save area */
4915 offset += frame->nregs * UNITS_PER_WORD;
4918 if (ix86_save_varrargs_registers)
4920 offset += X86_64_VARARGS_SIZE;
4921 frame->va_arg_size = X86_64_VARARGS_SIZE;
4924 frame->va_arg_size = 0;
4926 /* Align start of frame for local function. */
4927 frame->padding1 = ((offset + stack_alignment_needed - 1)
4928 & -stack_alignment_needed) - offset;
4930 offset += frame->padding1;
4932 /* Frame pointer points here. */
4933 frame->frame_pointer_offset = offset;
4937 /* Add outgoing arguments area. Can be skipped if we eliminated
4938 all the function calls as dead code. */
4939 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4941 offset += current_function_outgoing_args_size;
4942 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4945 frame->outgoing_arguments_size = 0;
4947 /* Align stack boundary. Only needed if we're calling another function
4949 if (!current_function_is_leaf || current_function_calls_alloca)
4950 frame->padding2 = ((offset + preferred_alignment - 1)
4951 & -preferred_alignment) - offset;
4953 frame->padding2 = 0;
4955 offset += frame->padding2;
4957 /* We've reached end of stack frame. */
4958 frame->stack_pointer_offset = offset;
4960 /* Size prologue needs to allocate. */
4961 frame->to_allocate =
4962 (size + frame->padding1 + frame->padding2
4963 + frame->outgoing_arguments_size + frame->va_arg_size);
4965 if (!frame->to_allocate && frame->nregs <= 1)
4966 frame->save_regs_using_mov = false;
4968 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4969 && current_function_is_leaf)
4971 frame->red_zone_size = frame->to_allocate;
4972 if (frame->save_regs_using_mov)
4973 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4974 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4975 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4978 frame->red_zone_size = 0;
4979 frame->to_allocate -= frame->red_zone_size;
4980 frame->stack_pointer_offset -= frame->red_zone_size;
4982 fprintf (stderr, "nregs: %i\n", frame->nregs);
4983 fprintf (stderr, "size: %i\n", size);
4984 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4985 fprintf (stderr, "padding1: %i\n", frame->padding1);
4986 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4987 fprintf (stderr, "padding2: %i\n", frame->padding2);
4988 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4989 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4990 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4991 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4992 frame->hard_frame_pointer_offset);
4993 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4997 /* Emit code to save registers in the prologue. */
5000 ix86_emit_save_regs (void)
5005 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5006 if (ix86_save_reg (regno, true))
5008 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5009 RTX_FRAME_RELATED_P (insn) = 1;
5013 /* Emit code to save registers using MOV insns. First register
5014 is restored from POINTER + OFFSET. */
5016 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5021 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5022 if (ix86_save_reg (regno, true))
5024 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5026 gen_rtx_REG (Pmode, regno));
5027 RTX_FRAME_RELATED_P (insn) = 1;
5028 offset += UNITS_PER_WORD;
5032 /* Expand the prologue into a bunch of separate insns. */
5035 ix86_expand_prologue (void)
5039 struct ix86_frame frame;
5040 HOST_WIDE_INT allocate;
5042 ix86_compute_frame_layout (&frame);
5044 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5045 slower on all targets. Also sdb doesn't like it. */
5047 if (frame_pointer_needed)
5049 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5050 RTX_FRAME_RELATED_P (insn) = 1;
5052 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5053 RTX_FRAME_RELATED_P (insn) = 1;
5056 allocate = frame.to_allocate;
5058 if (!frame.save_regs_using_mov)
5059 ix86_emit_save_regs ();
5061 allocate += frame.nregs * UNITS_PER_WORD;
5063 /* When using red zone we may start register saving before allocating
5064 the stack frame saving one cycle of the prologue. */
5065 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5066 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5067 : stack_pointer_rtx,
5068 -frame.nregs * UNITS_PER_WORD);
5072 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5074 insn = emit_insn (gen_pro_epilogue_adjust_stack
5075 (stack_pointer_rtx, stack_pointer_rtx,
5076 GEN_INT (-allocate)));
5077 RTX_FRAME_RELATED_P (insn) = 1;
5081 /* ??? Is this only valid for Win32? */
5088 arg0 = gen_rtx_REG (SImode, 0);
5089 emit_move_insn (arg0, GEN_INT (allocate));
5091 sym = gen_rtx_MEM (FUNCTION_MODE,
5092 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5093 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5095 CALL_INSN_FUNCTION_USAGE (insn)
5096 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5097 CALL_INSN_FUNCTION_USAGE (insn));
5099 /* Don't allow scheduling pass to move insns across __alloca
5101 emit_insn (gen_blockage (const0_rtx));
5103 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5105 if (!frame_pointer_needed || !frame.to_allocate)
5106 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5108 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5109 -frame.nregs * UNITS_PER_WORD);
5112 pic_reg_used = false;
5113 if (pic_offset_table_rtx
5114 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5115 || current_function_profile))
5117 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5119 if (alt_pic_reg_used != INVALID_REGNUM)
5120 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5122 pic_reg_used = true;
5127 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5129 /* Even with accurate pre-reload life analysis, we can wind up
5130 deleting all references to the pic register after reload.
5131 Consider if cross-jumping unifies two sides of a branch
5132 controlled by a comparison vs the only read from a global.
5133 In which case, allow the set_got to be deleted, though we're
5134 too late to do anything about the ebx save in the prologue. */
5135 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5138 /* Prevent function calls from be scheduled before the call to mcount.
5139 In the pic_reg_used case, make sure that the got load isn't deleted. */
5140 if (current_function_profile)
5141 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5144 /* Emit code to restore saved registers using MOV insns. First register
5145 is restored from POINTER + OFFSET. */
5147 ix86_emit_restore_regs_using_mov (rtx pointer, int offset, int maybe_eh_return)
5151 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5152 if (ix86_save_reg (regno, maybe_eh_return))
5154 emit_move_insn (gen_rtx_REG (Pmode, regno),
5155 adjust_address (gen_rtx_MEM (Pmode, pointer),
5157 offset += UNITS_PER_WORD;
5161 /* Restore function stack, frame, and registers. */
5164 ix86_expand_epilogue (int style)
5167 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5168 struct ix86_frame frame;
5169 HOST_WIDE_INT offset;
5171 ix86_compute_frame_layout (&frame);
5173 /* Calculate start of saved registers relative to ebp. Special care
5174 must be taken for the normal return case of a function using
5175 eh_return: the eax and edx registers are marked as saved, but not
5176 restored along this path. */
5177 offset = frame.nregs;
5178 if (current_function_calls_eh_return && style != 2)
5180 offset *= -UNITS_PER_WORD;
5182 /* If we're only restoring one register and sp is not valid then
5183 using a move instruction to restore the register since it's
5184 less work than reloading sp and popping the register.
5186 The default code result in stack adjustment using add/lea instruction,
5187 while this code results in LEAVE instruction (or discrete equivalent),
5188 so it is profitable in some other cases as well. Especially when there
5189 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5190 and there is exactly one register to pop. This heuristic may need some
5191 tuning in future. */
5192 if ((!sp_valid && frame.nregs <= 1)
5193 || (TARGET_EPILOGUE_USING_MOVE
5194 && cfun->machine->use_fast_prologue_epilogue
5195 && (frame.nregs > 1 || frame.to_allocate))
5196 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5197 || (frame_pointer_needed && TARGET_USE_LEAVE
5198 && cfun->machine->use_fast_prologue_epilogue
5199 && frame.nregs == 1)
5200 || current_function_calls_eh_return)
5202 /* Restore registers. We can use ebp or esp to address the memory
5203 locations. If both are available, default to ebp, since offsets
5204 are known to be small. Only exception is esp pointing directly to the
5205 end of block of saved registers, where we may simplify addressing
5208 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5209 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5210 frame.to_allocate, style == 2);
5212 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5213 offset, style == 2);
5215 /* eh_return epilogues need %ecx added to the stack pointer. */
5218 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5220 if (frame_pointer_needed)
5222 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5223 tmp = plus_constant (tmp, UNITS_PER_WORD);
5224 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5226 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5227 emit_move_insn (hard_frame_pointer_rtx, tmp);
5229 emit_insn (gen_pro_epilogue_adjust_stack
5230 (stack_pointer_rtx, sa, const0_rtx));
5234 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5235 tmp = plus_constant (tmp, (frame.to_allocate
5236 + frame.nregs * UNITS_PER_WORD));
5237 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5240 else if (!frame_pointer_needed)
5241 emit_insn (gen_pro_epilogue_adjust_stack
5242 (stack_pointer_rtx, stack_pointer_rtx,
5243 GEN_INT (frame.to_allocate
5244 + frame.nregs * UNITS_PER_WORD)));
5245 /* If not an i386, mov & pop is faster than "leave". */
5246 else if (TARGET_USE_LEAVE || optimize_size
5247 || !cfun->machine->use_fast_prologue_epilogue)
5248 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5251 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5252 hard_frame_pointer_rtx,
5255 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5257 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5262 /* First step is to deallocate the stack frame so that we can
5263 pop the registers. */
5266 if (!frame_pointer_needed)
5268 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5269 hard_frame_pointer_rtx,
5272 else if (frame.to_allocate)
5273 emit_insn (gen_pro_epilogue_adjust_stack
5274 (stack_pointer_rtx, stack_pointer_rtx,
5275 GEN_INT (frame.to_allocate)));
5277 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5278 if (ix86_save_reg (regno, false))
5281 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5283 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5285 if (frame_pointer_needed)
5287 /* Leave results in shorter dependency chains on CPUs that are
5288 able to grok it fast. */
5289 if (TARGET_USE_LEAVE)
5290 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5291 else if (TARGET_64BIT)
5292 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5294 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5298 /* Sibcall epilogues don't want a return instruction. */
5302 if (current_function_pops_args && current_function_args_size)
5304 rtx popc = GEN_INT (current_function_pops_args);
5306 /* i386 can only pop 64K bytes. If asked to pop more, pop
5307 return address, do explicit add, and jump indirectly to the
5310 if (current_function_pops_args >= 65536)
5312 rtx ecx = gen_rtx_REG (SImode, 2);
5314 /* There are is no "pascal" calling convention in 64bit ABI. */
5318 emit_insn (gen_popsi1 (ecx));
5319 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5320 emit_jump_insn (gen_return_indirect_internal (ecx));
5323 emit_jump_insn (gen_return_pop_internal (popc));
5326 emit_jump_insn (gen_return_internal ());
5329 /* Reset from the function's potential modifications. */
5332 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5333 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5335 if (pic_offset_table_rtx)
5336 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5339 /* Extract the parts of an RTL expression that is a valid memory address
5340 for an instruction. Return 0 if the structure of the address is
5341 grossly off. Return -1 if the address contains ASHIFT, so it is not
5342 strictly valid, but still used for computing length of lea instruction. */
5345 ix86_decompose_address (register rtx addr, struct ix86_address *out)
5347 rtx base = NULL_RTX;
5348 rtx index = NULL_RTX;
5349 rtx disp = NULL_RTX;
5350 HOST_WIDE_INT scale = 1;
5351 rtx scale_rtx = NULL_RTX;
5353 enum ix86_address_seg seg = SEG_DEFAULT;
5355 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5357 else if (GET_CODE (addr) == PLUS)
5367 addends[n++] = XEXP (op, 1);
5370 while (GET_CODE (op) == PLUS);
5375 for (i = n; i >= 0; --i)
5378 switch (GET_CODE (op))
5383 index = XEXP (op, 0);
5384 scale_rtx = XEXP (op, 1);
5388 if (XINT (op, 1) == UNSPEC_TP
5389 && TARGET_TLS_DIRECT_SEG_REFS
5390 && seg == SEG_DEFAULT)
5391 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5420 else if (GET_CODE (addr) == MULT)
5422 index = XEXP (addr, 0); /* index*scale */
5423 scale_rtx = XEXP (addr, 1);
5425 else if (GET_CODE (addr) == ASHIFT)
5429 /* We're called for lea too, which implements ashift on occasion. */
5430 index = XEXP (addr, 0);
5431 tmp = XEXP (addr, 1);
5432 if (GET_CODE (tmp) != CONST_INT)
5434 scale = INTVAL (tmp);
5435 if ((unsigned HOST_WIDE_INT) scale > 3)
5441 disp = addr; /* displacement */
5443 /* Extract the integral value of scale. */
5446 if (GET_CODE (scale_rtx) != CONST_INT)
5448 scale = INTVAL (scale_rtx);
5451 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5452 if (base && index && scale == 1
5453 && (index == arg_pointer_rtx
5454 || index == frame_pointer_rtx
5455 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5462 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5463 if ((base == hard_frame_pointer_rtx
5464 || base == frame_pointer_rtx
5465 || base == arg_pointer_rtx) && !disp)
5468 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5469 Avoid this by transforming to [%esi+0]. */
5470 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5471 && base && !index && !disp
5473 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5476 /* Special case: encode reg+reg instead of reg*2. */
5477 if (!base && index && scale && scale == 2)
5478 base = index, scale = 1;
5480 /* Special case: scaling cannot be encoded without base or displacement. */
5481 if (!base && !disp && index && scale != 1)
5493 /* Return cost of the memory address x.
5494 For i386, it is better to use a complex address than let gcc copy
5495 the address into a reg and make a new pseudo. But not if the address
5496 requires to two regs - that would mean more pseudos with longer
5499 ix86_address_cost (rtx x)
5501 struct ix86_address parts;
5504 if (!ix86_decompose_address (x, &parts))
5507 if (parts.base && GET_CODE (parts.base) == SUBREG)
5508 parts.base = SUBREG_REG (parts.base);
5509 if (parts.index && GET_CODE (parts.index) == SUBREG)
5510 parts.index = SUBREG_REG (parts.index);
5512 /* More complex memory references are better. */
5513 if (parts.disp && parts.disp != const0_rtx)
5515 if (parts.seg != SEG_DEFAULT)
5518 /* Attempt to minimize number of registers in the address. */
5520 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5522 && (!REG_P (parts.index)
5523 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5527 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5529 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5530 && parts.base != parts.index)
5533 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5534 since it's predecode logic can't detect the length of instructions
5535 and it degenerates to vector decoded. Increase cost of such
5536 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5537 to split such addresses or even refuse such addresses at all.
5539 Following addressing modes are affected:
5544 The first and last case may be avoidable by explicitly coding the zero in
5545 memory address, but I don't have AMD-K6 machine handy to check this
5549 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5550 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5551 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5557 /* If X is a machine specific address (i.e. a symbol or label being
5558 referenced as a displacement from the GOT implemented using an
5559 UNSPEC), then return the base term. Otherwise return X. */
5562 ix86_find_base_term (rtx x)
5568 if (GET_CODE (x) != CONST)
5571 if (GET_CODE (term) == PLUS
5572 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5573 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5574 term = XEXP (term, 0);
5575 if (GET_CODE (term) != UNSPEC
5576 || XINT (term, 1) != UNSPEC_GOTPCREL)
5579 term = XVECEXP (term, 0, 0);
5581 if (GET_CODE (term) != SYMBOL_REF
5582 && GET_CODE (term) != LABEL_REF)
5588 term = ix86_delegitimize_address (x);
5590 if (GET_CODE (term) != SYMBOL_REF
5591 && GET_CODE (term) != LABEL_REF)
5597 /* Determine if a given RTX is a valid constant. We already know this
5598 satisfies CONSTANT_P. */
5601 legitimate_constant_p (rtx x)
5605 switch (GET_CODE (x))
5608 /* TLS symbols are not constant. */
5609 if (tls_symbolic_operand (x, Pmode))
5614 inner = XEXP (x, 0);
5616 /* Offsets of TLS symbols are never valid.
5617 Discourage CSE from creating them. */
5618 if (GET_CODE (inner) == PLUS
5619 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5622 if (GET_CODE (inner) == PLUS)
5624 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5626 inner = XEXP (inner, 0);
5629 /* Only some unspecs are valid as "constants". */
5630 if (GET_CODE (inner) == UNSPEC)
5631 switch (XINT (inner, 1))
5635 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5637 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5647 /* Otherwise we handle everything else in the move patterns. */
5651 /* Determine if it's legal to put X into the constant pool. This
5652 is not possible for the address of thread-local symbols, which
5653 is checked above. */
5656 ix86_cannot_force_const_mem (rtx x)
5658 return !legitimate_constant_p (x);
5661 /* Determine if a given RTX is a valid constant address. */
5664 constant_address_p (rtx x)
5666 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5669 /* Nonzero if the constant value X is a legitimate general operand
5670 when generating PIC code. It is given that flag_pic is on and
5671 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5674 legitimate_pic_operand_p (rtx x)
5678 switch (GET_CODE (x))
5681 inner = XEXP (x, 0);
5683 /* Only some unspecs are valid as "constants". */
5684 if (GET_CODE (inner) == UNSPEC)
5685 switch (XINT (inner, 1))
5688 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5696 return legitimate_pic_address_disp_p (x);
5703 /* Determine if a given CONST RTX is a valid memory displacement
5707 legitimate_pic_address_disp_p (register rtx disp)
5711 /* In 64bit mode we can allow direct addresses of symbols and labels
5712 when they are not dynamic symbols. */
5715 /* TLS references should always be enclosed in UNSPEC. */
5716 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5718 if (GET_CODE (disp) == SYMBOL_REF
5719 && ix86_cmodel == CM_SMALL_PIC
5720 && SYMBOL_REF_LOCAL_P (disp))
5722 if (GET_CODE (disp) == LABEL_REF)
5724 if (GET_CODE (disp) == CONST
5725 && GET_CODE (XEXP (disp, 0)) == PLUS)
5727 rtx op0 = XEXP (XEXP (disp, 0), 0);
5728 rtx op1 = XEXP (XEXP (disp, 0), 1);
5730 /* TLS references should always be enclosed in UNSPEC. */
5731 if (tls_symbolic_operand (op0, GET_MODE (op0)))
5733 if (((GET_CODE (op0) == SYMBOL_REF
5734 && ix86_cmodel == CM_SMALL_PIC
5735 && SYMBOL_REF_LOCAL_P (op0))
5736 || GET_CODE (op0) == LABEL_REF)
5737 && GET_CODE (op1) == CONST_INT
5738 && INTVAL (op1) < 16*1024*1024
5739 && INTVAL (op1) >= -16*1024*1024)
5743 if (GET_CODE (disp) != CONST)
5745 disp = XEXP (disp, 0);
5749 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5750 of GOT tables. We should not need these anyway. */
5751 if (GET_CODE (disp) != UNSPEC
5752 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5755 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5756 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5762 if (GET_CODE (disp) == PLUS)
5764 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5766 disp = XEXP (disp, 0);
5770 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5771 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5773 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5774 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5775 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5777 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5778 if (! strcmp (sym_name, "<pic base>"))
5783 if (GET_CODE (disp) != UNSPEC)
5786 switch (XINT (disp, 1))
5791 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5793 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5794 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5795 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5797 case UNSPEC_GOTTPOFF:
5798 case UNSPEC_GOTNTPOFF:
5799 case UNSPEC_INDNTPOFF:
5802 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5804 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5806 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5812 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5813 memory address for an instruction. The MODE argument is the machine mode
5814 for the MEM expression that wants to use this address.
5816 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5817 convert common non-canonical forms to canonical form so that they will
5821 legitimate_address_p (enum machine_mode mode, register rtx addr, int strict)
5823 struct ix86_address parts;
5824 rtx base, index, disp;
5825 HOST_WIDE_INT scale;
5826 const char *reason = NULL;
5827 rtx reason_rtx = NULL_RTX;
5829 if (TARGET_DEBUG_ADDR)
5832 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5833 GET_MODE_NAME (mode), strict);
5837 if (ix86_decompose_address (addr, &parts) <= 0)
5839 reason = "decomposition failed";
5844 index = parts.index;
5846 scale = parts.scale;
5848 /* Validate base register.
5850 Don't allow SUBREG's here, it can lead to spill failures when the base
5851 is one word out of a two word structure, which is represented internally
5859 if (GET_CODE (base) == SUBREG)
5860 reg = SUBREG_REG (base);
5864 if (GET_CODE (reg) != REG)
5866 reason = "base is not a register";
5870 if (GET_MODE (base) != Pmode)
5872 reason = "base is not in Pmode";
5876 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5877 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5879 reason = "base is not valid";
5884 /* Validate index register.
5886 Don't allow SUBREG's here, it can lead to spill failures when the index
5887 is one word out of a two word structure, which is represented internally
5895 if (GET_CODE (index) == SUBREG)
5896 reg = SUBREG_REG (index);
5900 if (GET_CODE (reg) != REG)
5902 reason = "index is not a register";
5906 if (GET_MODE (index) != Pmode)
5908 reason = "index is not in Pmode";
5912 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5913 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5915 reason = "index is not valid";
5920 /* Validate scale factor. */
5923 reason_rtx = GEN_INT (scale);
5926 reason = "scale without index";
5930 if (scale != 2 && scale != 4 && scale != 8)
5932 reason = "scale is not a valid multiplier";
5937 /* Validate displacement. */
5942 if (GET_CODE (disp) == CONST
5943 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5944 switch (XINT (XEXP (disp, 0), 1))
5948 case UNSPEC_GOTPCREL:
5951 goto is_legitimate_pic;
5953 case UNSPEC_GOTTPOFF:
5954 case UNSPEC_GOTNTPOFF:
5955 case UNSPEC_INDNTPOFF:
5961 reason = "invalid address unspec";
5965 else if (flag_pic && (SYMBOLIC_CONST (disp)
5967 && !machopic_operand_p (disp)
5972 if (TARGET_64BIT && (index || base))
5974 /* foo@dtpoff(%rX) is ok. */
5975 if (GET_CODE (disp) != CONST
5976 || GET_CODE (XEXP (disp, 0)) != PLUS
5977 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5978 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5979 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5980 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5982 reason = "non-constant pic memory reference";
5986 else if (! legitimate_pic_address_disp_p (disp))
5988 reason = "displacement is an invalid pic construct";
5992 /* This code used to verify that a symbolic pic displacement
5993 includes the pic_offset_table_rtx register.
5995 While this is good idea, unfortunately these constructs may
5996 be created by "adds using lea" optimization for incorrect
6005 This code is nonsensical, but results in addressing
6006 GOT table with pic_offset_table_rtx base. We can't
6007 just refuse it easily, since it gets matched by
6008 "addsi3" pattern, that later gets split to lea in the
6009 case output register differs from input. While this
6010 can be handled by separate addsi pattern for this case
6011 that never results in lea, this seems to be easier and
6012 correct fix for crash to disable this test. */
6014 else if (GET_CODE (disp) != LABEL_REF
6015 && GET_CODE (disp) != CONST_INT
6016 && (GET_CODE (disp) != CONST
6017 || !legitimate_constant_p (disp))
6018 && (GET_CODE (disp) != SYMBOL_REF
6019 || !legitimate_constant_p (disp)))
6021 reason = "displacement is not constant";
6024 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6026 reason = "displacement is out of range";
6031 /* Everything looks valid. */
6032 if (TARGET_DEBUG_ADDR)
6033 fprintf (stderr, "Success.\n");
6037 if (TARGET_DEBUG_ADDR)
6039 fprintf (stderr, "Error: %s\n", reason);
6040 debug_rtx (reason_rtx);
6045 /* Return an unique alias set for the GOT. */
6047 static HOST_WIDE_INT
6048 ix86_GOT_alias_set (void)
6050 static HOST_WIDE_INT set = -1;
6052 set = new_alias_set ();
6056 /* Return a legitimate reference for ORIG (an address) using the
6057 register REG. If REG is 0, a new pseudo is generated.
6059 There are two types of references that must be handled:
6061 1. Global data references must load the address from the GOT, via
6062 the PIC reg. An insn is emitted to do this load, and the reg is
6065 2. Static data references, constant pool addresses, and code labels
6066 compute the address as an offset from the GOT, whose base is in
6067 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6068 differentiate them from global data objects. The returned
6069 address is the PIC reg + an unspec constant.
6071 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6072 reg also appears in the address. */
6075 legitimize_pic_address (rtx orig, rtx reg)
6083 reg = gen_reg_rtx (Pmode);
6084 /* Use the generic Mach-O PIC machinery. */
6085 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6088 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6090 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6092 /* This symbol may be referenced via a displacement from the PIC
6093 base address (@GOTOFF). */
6095 if (reload_in_progress)
6096 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6097 if (GET_CODE (addr) == CONST)
6098 addr = XEXP (addr, 0);
6099 if (GET_CODE (addr) == PLUS)
6101 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6102 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6105 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6106 new = gen_rtx_CONST (Pmode, new);
6107 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6111 emit_move_insn (reg, new);
6115 else if (GET_CODE (addr) == SYMBOL_REF)
6119 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6120 new = gen_rtx_CONST (Pmode, new);
6121 new = gen_rtx_MEM (Pmode, new);
6122 RTX_UNCHANGING_P (new) = 1;
6123 set_mem_alias_set (new, ix86_GOT_alias_set ());
6126 reg = gen_reg_rtx (Pmode);
6127 /* Use directly gen_movsi, otherwise the address is loaded
6128 into register for CSE. We don't want to CSE this addresses,
6129 instead we CSE addresses from the GOT table, so skip this. */
6130 emit_insn (gen_movsi (reg, new));
6135 /* This symbol must be referenced via a load from the
6136 Global Offset Table (@GOT). */
6138 if (reload_in_progress)
6139 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6140 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6141 new = gen_rtx_CONST (Pmode, new);
6142 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6143 new = gen_rtx_MEM (Pmode, new);
6144 RTX_UNCHANGING_P (new) = 1;
6145 set_mem_alias_set (new, ix86_GOT_alias_set ());
6148 reg = gen_reg_rtx (Pmode);
6149 emit_move_insn (reg, new);
6155 if (GET_CODE (addr) == CONST)
6157 addr = XEXP (addr, 0);
6159 /* We must match stuff we generate before. Assume the only
6160 unspecs that can get here are ours. Not that we could do
6161 anything with them anyway... */
6162 if (GET_CODE (addr) == UNSPEC
6163 || (GET_CODE (addr) == PLUS
6164 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6166 if (GET_CODE (addr) != PLUS)
6169 if (GET_CODE (addr) == PLUS)
6171 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6173 /* Check first to see if this is a constant offset from a @GOTOFF
6174 symbol reference. */
6175 if (local_symbolic_operand (op0, Pmode)
6176 && GET_CODE (op1) == CONST_INT)
6180 if (reload_in_progress)
6181 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6182 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6184 new = gen_rtx_PLUS (Pmode, new, op1);
6185 new = gen_rtx_CONST (Pmode, new);
6186 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6190 emit_move_insn (reg, new);
6196 if (INTVAL (op1) < -16*1024*1024
6197 || INTVAL (op1) >= 16*1024*1024)
6198 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6203 base = legitimize_pic_address (XEXP (addr, 0), reg);
6204 new = legitimize_pic_address (XEXP (addr, 1),
6205 base == reg ? NULL_RTX : reg);
6207 if (GET_CODE (new) == CONST_INT)
6208 new = plus_constant (base, INTVAL (new));
6211 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6213 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6214 new = XEXP (new, 1);
6216 new = gen_rtx_PLUS (Pmode, base, new);
6224 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6227 get_thread_pointer (int to_reg)
6231 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6235 reg = gen_reg_rtx (Pmode);
6236 insn = gen_rtx_SET (VOIDmode, reg, tp);
6237 insn = emit_insn (insn);
6242 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6243 false if we expect this to be used for a memory address and true if
6244 we expect to load the address into a register. */
6247 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6249 rtx dest, base, off, pic;
6254 case TLS_MODEL_GLOBAL_DYNAMIC:
6255 dest = gen_reg_rtx (Pmode);
6258 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6261 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6262 insns = get_insns ();
6265 emit_libcall_block (insns, dest, rax, x);
6268 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6271 case TLS_MODEL_LOCAL_DYNAMIC:
6272 base = gen_reg_rtx (Pmode);
6275 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6278 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6279 insns = get_insns ();
6282 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6283 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6284 emit_libcall_block (insns, base, rax, note);
6287 emit_insn (gen_tls_local_dynamic_base_32 (base));
6289 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6290 off = gen_rtx_CONST (Pmode, off);
6292 return gen_rtx_PLUS (Pmode, base, off);
6294 case TLS_MODEL_INITIAL_EXEC:
6298 type = UNSPEC_GOTNTPOFF;
6302 if (reload_in_progress)
6303 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6304 pic = pic_offset_table_rtx;
6305 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6307 else if (!TARGET_GNU_TLS)
6309 pic = gen_reg_rtx (Pmode);
6310 emit_insn (gen_set_got (pic));
6311 type = UNSPEC_GOTTPOFF;
6316 type = UNSPEC_INDNTPOFF;
6319 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6320 off = gen_rtx_CONST (Pmode, off);
6322 off = gen_rtx_PLUS (Pmode, pic, off);
6323 off = gen_rtx_MEM (Pmode, off);
6324 RTX_UNCHANGING_P (off) = 1;
6325 set_mem_alias_set (off, ix86_GOT_alias_set ());
6327 if (TARGET_64BIT || TARGET_GNU_TLS)
6329 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6330 off = force_reg (Pmode, off);
6331 return gen_rtx_PLUS (Pmode, base, off);
6335 base = get_thread_pointer (true);
6336 dest = gen_reg_rtx (Pmode);
6337 emit_insn (gen_subsi3 (dest, base, off));
6341 case TLS_MODEL_LOCAL_EXEC:
6342 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6343 (TARGET_64BIT || TARGET_GNU_TLS)
6344 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6345 off = gen_rtx_CONST (Pmode, off);
6347 if (TARGET_64BIT || TARGET_GNU_TLS)
6349 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6350 return gen_rtx_PLUS (Pmode, base, off);
6354 base = get_thread_pointer (true);
6355 dest = gen_reg_rtx (Pmode);
6356 emit_insn (gen_subsi3 (dest, base, off));
6367 /* Try machine-dependent ways of modifying an illegitimate address
6368 to be legitimate. If we find one, return the new, valid address.
6369 This macro is used in only one place: `memory_address' in explow.c.
6371 OLDX is the address as it was before break_out_memory_refs was called.
6372 In some cases it is useful to look at this to decide what needs to be done.
6374 MODE and WIN are passed so that this macro can use
6375 GO_IF_LEGITIMATE_ADDRESS.
6377 It is always safe for this macro to do nothing. It exists to recognize
6378 opportunities to optimize the output.
6380 For the 80386, we handle X+REG by loading X into a register R and
6381 using R+REG. R will go in a general reg and indexing will be used.
6382 However, if REG is a broken-out memory address or multiplication,
6383 nothing needs to be done because REG can certainly go in a general reg.
6385 When -fpic is used, special handling is needed for symbolic references.
6386 See comments by legitimize_pic_address in i386.c for details. */
6389 legitimize_address (register rtx x, register rtx oldx ATTRIBUTE_UNUSED,
6390 enum machine_mode mode)
6395 if (TARGET_DEBUG_ADDR)
6397 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6398 GET_MODE_NAME (mode));
6402 log = tls_symbolic_operand (x, mode);
6404 return legitimize_tls_address (x, log, false);
6406 if (flag_pic && SYMBOLIC_CONST (x))
6407 return legitimize_pic_address (x, 0);
6409 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6410 if (GET_CODE (x) == ASHIFT
6411 && GET_CODE (XEXP (x, 1)) == CONST_INT
6412 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6415 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6416 GEN_INT (1 << log));
6419 if (GET_CODE (x) == PLUS)
6421 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6423 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6424 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6425 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6428 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6429 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6430 GEN_INT (1 << log));
6433 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6434 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6435 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6438 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6439 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6440 GEN_INT (1 << log));
6443 /* Put multiply first if it isn't already. */
6444 if (GET_CODE (XEXP (x, 1)) == MULT)
6446 rtx tmp = XEXP (x, 0);
6447 XEXP (x, 0) = XEXP (x, 1);
6452 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6453 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6454 created by virtual register instantiation, register elimination, and
6455 similar optimizations. */
6456 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6459 x = gen_rtx_PLUS (Pmode,
6460 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6461 XEXP (XEXP (x, 1), 0)),
6462 XEXP (XEXP (x, 1), 1));
6466 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6467 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6468 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6469 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6470 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6471 && CONSTANT_P (XEXP (x, 1)))
6474 rtx other = NULL_RTX;
6476 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6478 constant = XEXP (x, 1);
6479 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6481 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6483 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6484 other = XEXP (x, 1);
6492 x = gen_rtx_PLUS (Pmode,
6493 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6494 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6495 plus_constant (other, INTVAL (constant)));
6499 if (changed && legitimate_address_p (mode, x, FALSE))
6502 if (GET_CODE (XEXP (x, 0)) == MULT)
6505 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6508 if (GET_CODE (XEXP (x, 1)) == MULT)
6511 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6515 && GET_CODE (XEXP (x, 1)) == REG
6516 && GET_CODE (XEXP (x, 0)) == REG)
6519 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6522 x = legitimize_pic_address (x, 0);
6525 if (changed && legitimate_address_p (mode, x, FALSE))
6528 if (GET_CODE (XEXP (x, 0)) == REG)
6530 register rtx temp = gen_reg_rtx (Pmode);
6531 register rtx val = force_operand (XEXP (x, 1), temp);
6533 emit_move_insn (temp, val);
6539 else if (GET_CODE (XEXP (x, 1)) == REG)
6541 register rtx temp = gen_reg_rtx (Pmode);
6542 register rtx val = force_operand (XEXP (x, 0), temp);
6544 emit_move_insn (temp, val);
6554 /* Print an integer constant expression in assembler syntax. Addition
6555 and subtraction are the only arithmetic that may appear in these
6556 expressions. FILE is the stdio stream to write to, X is the rtx, and
6557 CODE is the operand print code from the output string. */
6560 output_pic_addr_const (FILE *file, rtx x, int code)
6564 switch (GET_CODE (x))
6574 assemble_name (file, XSTR (x, 0));
6575 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6576 fputs ("@PLT", file);
6583 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6584 assemble_name (asm_out_file, buf);
6588 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6592 /* This used to output parentheses around the expression,
6593 but that does not work on the 386 (either ATT or BSD assembler). */
6594 output_pic_addr_const (file, XEXP (x, 0), code);
6598 if (GET_MODE (x) == VOIDmode)
6600 /* We can use %d if the number is <32 bits and positive. */
6601 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6602 fprintf (file, "0x%lx%08lx",
6603 (unsigned long) CONST_DOUBLE_HIGH (x),
6604 (unsigned long) CONST_DOUBLE_LOW (x));
6606 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6609 /* We can't handle floating point constants;
6610 PRINT_OPERAND must handle them. */
6611 output_operand_lossage ("floating constant misused");
6615 /* Some assemblers need integer constants to appear first. */
6616 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6618 output_pic_addr_const (file, XEXP (x, 0), code);
6620 output_pic_addr_const (file, XEXP (x, 1), code);
6622 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6624 output_pic_addr_const (file, XEXP (x, 1), code);
6626 output_pic_addr_const (file, XEXP (x, 0), code);
6634 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6635 output_pic_addr_const (file, XEXP (x, 0), code);
6637 output_pic_addr_const (file, XEXP (x, 1), code);
6639 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6643 if (XVECLEN (x, 0) != 1)
6645 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6646 switch (XINT (x, 1))
6649 fputs ("@GOT", file);
6652 fputs ("@GOTOFF", file);
6654 case UNSPEC_GOTPCREL:
6655 fputs ("@GOTPCREL(%rip)", file);
6657 case UNSPEC_GOTTPOFF:
6658 /* FIXME: This might be @TPOFF in Sun ld too. */
6659 fputs ("@GOTTPOFF", file);
6662 fputs ("@TPOFF", file);
6666 fputs ("@TPOFF", file);
6668 fputs ("@NTPOFF", file);
6671 fputs ("@DTPOFF", file);
6673 case UNSPEC_GOTNTPOFF:
6675 fputs ("@GOTTPOFF(%rip)", file);
6677 fputs ("@GOTNTPOFF", file);
6679 case UNSPEC_INDNTPOFF:
6680 fputs ("@INDNTPOFF", file);
6683 output_operand_lossage ("invalid UNSPEC as operand");
6689 output_operand_lossage ("invalid expression as operand");
6693 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6694 We need to handle our special PIC relocations. */
6697 i386_dwarf_output_addr_const (FILE *file, rtx x)
6700 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6704 fprintf (file, "%s", ASM_LONG);
6707 output_pic_addr_const (file, x, '\0');
6709 output_addr_const (file, x);
6713 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6714 We need to emit DTP-relative relocations. */
6717 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6719 fputs (ASM_LONG, file);
6720 output_addr_const (file, x);
6721 fputs ("@DTPOFF", file);
6727 fputs (", 0", file);
6734 /* In the name of slightly smaller debug output, and to cater to
6735 general assembler losage, recognize PIC+GOTOFF and turn it back
6736 into a direct symbol reference. */
6739 ix86_delegitimize_address (rtx orig_x)
6743 if (GET_CODE (x) == MEM)
6748 if (GET_CODE (x) != CONST
6749 || GET_CODE (XEXP (x, 0)) != UNSPEC
6750 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6751 || GET_CODE (orig_x) != MEM)
6753 return XVECEXP (XEXP (x, 0), 0, 0);
6756 if (GET_CODE (x) != PLUS
6757 || GET_CODE (XEXP (x, 1)) != CONST)
6760 if (GET_CODE (XEXP (x, 0)) == REG
6761 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6762 /* %ebx + GOT/GOTOFF */
6764 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6766 /* %ebx + %reg * scale + GOT/GOTOFF */
6768 if (GET_CODE (XEXP (y, 0)) == REG
6769 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6771 else if (GET_CODE (XEXP (y, 1)) == REG
6772 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6776 if (GET_CODE (y) != REG
6777 && GET_CODE (y) != MULT
6778 && GET_CODE (y) != ASHIFT)
6784 x = XEXP (XEXP (x, 1), 0);
6785 if (GET_CODE (x) == UNSPEC
6786 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6787 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6790 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6791 return XVECEXP (x, 0, 0);
6794 if (GET_CODE (x) == PLUS
6795 && GET_CODE (XEXP (x, 0)) == UNSPEC
6796 && GET_CODE (XEXP (x, 1)) == CONST_INT
6797 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6798 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6799 && GET_CODE (orig_x) != MEM)))
6801 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6803 return gen_rtx_PLUS (Pmode, y, x);
6811 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6816 if (mode == CCFPmode || mode == CCFPUmode)
6818 enum rtx_code second_code, bypass_code;
6819 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6820 if (bypass_code != NIL || second_code != NIL)
6822 code = ix86_fp_compare_code_to_integer (code);
6826 code = reverse_condition (code);
6837 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6842 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6843 Those same assemblers have the same but opposite losage on cmov. */
6846 suffix = fp ? "nbe" : "a";
6849 if (mode == CCNOmode || mode == CCGOCmode)
6851 else if (mode == CCmode || mode == CCGCmode)
6862 if (mode == CCNOmode || mode == CCGOCmode)
6864 else if (mode == CCmode || mode == CCGCmode)
6873 suffix = fp ? "nb" : "ae";
6876 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6886 suffix = fp ? "u" : "p";
6889 suffix = fp ? "nu" : "np";
6894 fputs (suffix, file);
6898 print_reg (rtx x, int code, FILE *file)
6900 if (REGNO (x) == ARG_POINTER_REGNUM
6901 || REGNO (x) == FRAME_POINTER_REGNUM
6902 || REGNO (x) == FLAGS_REG
6903 || REGNO (x) == FPSR_REG)
6906 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6909 if (code == 'w' || MMX_REG_P (x))
6911 else if (code == 'b')
6913 else if (code == 'k')
6915 else if (code == 'q')
6917 else if (code == 'y')
6919 else if (code == 'h')
6922 code = GET_MODE_SIZE (GET_MODE (x));
6924 /* Irritatingly, AMD extended registers use different naming convention
6925 from the normal registers. */
6926 if (REX_INT_REG_P (x))
6933 error ("extended registers have no high halves");
6936 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6939 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6942 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6945 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6948 error ("unsupported operand size for extended register");
6956 if (STACK_TOP_P (x))
6958 fputs ("st(0)", file);
6965 if (! ANY_FP_REG_P (x))
6966 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6970 fputs (hi_reg_name[REGNO (x)], file);
6973 fputs (qi_reg_name[REGNO (x)], file);
6976 fputs (qi_high_reg_name[REGNO (x)], file);
6983 /* Locate some local-dynamic symbol still in use by this function
6984 so that we can print its name in some tls_local_dynamic_base
6988 get_some_local_dynamic_name (void)
6992 if (cfun->machine->some_ld_name)
6993 return cfun->machine->some_ld_name;
6995 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6997 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6998 return cfun->machine->some_ld_name;
7004 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7008 if (GET_CODE (x) == SYMBOL_REF
7009 && local_dynamic_symbolic_operand (x, Pmode))
7011 cfun->machine->some_ld_name = XSTR (x, 0);
7019 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7020 C -- print opcode suffix for set/cmov insn.
7021 c -- like C, but print reversed condition
7022 F,f -- likewise, but for floating-point.
7023 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7025 R -- print the prefix for register names.
7026 z -- print the opcode suffix for the size of the current operand.
7027 * -- print a star (in certain assembler syntax)
7028 A -- print an absolute memory reference.
7029 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7030 s -- print a shift double count, followed by the assemblers argument
7032 b -- print the QImode name of the register for the indicated operand.
7033 %b0 would print %al if operands[0] is reg 0.
7034 w -- likewise, print the HImode name of the register.
7035 k -- likewise, print the SImode name of the register.
7036 q -- likewise, print the DImode name of the register.
7037 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7038 y -- print "st(0)" instead of "st" as a register.
7039 D -- print condition for SSE cmp instruction.
7040 P -- if PIC, print an @PLT suffix.
7041 X -- don't print any sort of PIC '@' suffix for a symbol.
7042 & -- print some in-use local-dynamic symbol name.
7046 print_operand (FILE *file, rtx x, int code)
7053 if (ASSEMBLER_DIALECT == ASM_ATT)
7058 assemble_name (file, get_some_local_dynamic_name ());
7062 if (ASSEMBLER_DIALECT == ASM_ATT)
7064 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7066 /* Intel syntax. For absolute addresses, registers should not
7067 be surrounded by braces. */
7068 if (GET_CODE (x) != REG)
7071 PRINT_OPERAND (file, x, 0);
7079 PRINT_OPERAND (file, x, 0);
7084 if (ASSEMBLER_DIALECT == ASM_ATT)
7089 if (ASSEMBLER_DIALECT == ASM_ATT)
7094 if (ASSEMBLER_DIALECT == ASM_ATT)
7099 if (ASSEMBLER_DIALECT == ASM_ATT)
7104 if (ASSEMBLER_DIALECT == ASM_ATT)
7109 if (ASSEMBLER_DIALECT == ASM_ATT)
7114 /* 387 opcodes don't get size suffixes if the operands are
7116 if (STACK_REG_P (x))
7119 /* Likewise if using Intel opcodes. */
7120 if (ASSEMBLER_DIALECT == ASM_INTEL)
7123 /* This is the size of op from size of operand. */
7124 switch (GET_MODE_SIZE (GET_MODE (x)))
7127 #ifdef HAVE_GAS_FILDS_FISTS
7133 if (GET_MODE (x) == SFmode)
7148 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7150 #ifdef GAS_MNEMONICS
7176 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7178 PRINT_OPERAND (file, x, 0);
7184 /* Little bit of braindamage here. The SSE compare instructions
7185 does use completely different names for the comparisons that the
7186 fp conditional moves. */
7187 switch (GET_CODE (x))
7202 fputs ("unord", file);
7206 fputs ("neq", file);
7210 fputs ("nlt", file);
7214 fputs ("nle", file);
7217 fputs ("ord", file);
7225 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7226 if (ASSEMBLER_DIALECT == ASM_ATT)
7228 switch (GET_MODE (x))
7230 case HImode: putc ('w', file); break;
7232 case SFmode: putc ('l', file); break;
7234 case DFmode: putc ('q', file); break;
7242 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7245 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7246 if (ASSEMBLER_DIALECT == ASM_ATT)
7249 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7252 /* Like above, but reverse condition */
7254 /* Check to see if argument to %c is really a constant
7255 and not a condition code which needs to be reversed. */
7256 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7258 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7261 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7264 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7265 if (ASSEMBLER_DIALECT == ASM_ATT)
7268 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7274 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7277 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7280 int pred_val = INTVAL (XEXP (x, 0));
7282 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7283 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7285 int taken = pred_val > REG_BR_PROB_BASE / 2;
7286 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7288 /* Emit hints only in the case default branch prediction
7289 heuristics would fail. */
7290 if (taken != cputaken)
7292 /* We use 3e (DS) prefix for taken branches and
7293 2e (CS) prefix for not taken branches. */
7295 fputs ("ds ; ", file);
7297 fputs ("cs ; ", file);
7304 output_operand_lossage ("invalid operand code `%c'", code);
7308 if (GET_CODE (x) == REG)
7310 PRINT_REG (x, code, file);
7313 else if (GET_CODE (x) == MEM)
7315 /* No `byte ptr' prefix for call instructions. */
7316 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7319 switch (GET_MODE_SIZE (GET_MODE (x)))
7321 case 1: size = "BYTE"; break;
7322 case 2: size = "WORD"; break;
7323 case 4: size = "DWORD"; break;
7324 case 8: size = "QWORD"; break;
7325 case 12: size = "XWORD"; break;
7326 case 16: size = "XMMWORD"; break;
7331 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7334 else if (code == 'w')
7336 else if (code == 'k')
7340 fputs (" PTR ", file);
7344 /* Avoid (%rip) for call operands. */
7345 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7346 && GET_CODE (x) != CONST_INT)
7347 output_addr_const (file, x);
7348 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7349 output_operand_lossage ("invalid constraints for operand");
7354 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7359 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7360 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7362 if (ASSEMBLER_DIALECT == ASM_ATT)
7364 fprintf (file, "0x%lx", l);
7367 /* These float cases don't actually occur as immediate operands. */
7368 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7372 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7373 fprintf (file, "%s", dstr);
7376 else if (GET_CODE (x) == CONST_DOUBLE
7377 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7381 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7382 fprintf (file, "%s", dstr);
7389 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7391 if (ASSEMBLER_DIALECT == ASM_ATT)
7394 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7395 || GET_CODE (x) == LABEL_REF)
7397 if (ASSEMBLER_DIALECT == ASM_ATT)
7400 fputs ("OFFSET FLAT:", file);
7403 if (GET_CODE (x) == CONST_INT)
7404 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7406 output_pic_addr_const (file, x, code);
7408 output_addr_const (file, x);
7412 /* Print a memory operand whose address is ADDR. */
7415 print_operand_address (FILE *file, register rtx addr)
7417 struct ix86_address parts;
7418 rtx base, index, disp;
7421 if (! ix86_decompose_address (addr, &parts))
7425 index = parts.index;
7427 scale = parts.scale;
7435 if (USER_LABEL_PREFIX[0] == 0)
7437 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7443 if (!base && !index)
7445 /* Displacement only requires special attention. */
7447 if (GET_CODE (disp) == CONST_INT)
7449 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7451 if (USER_LABEL_PREFIX[0] == 0)
7453 fputs ("ds:", file);
7455 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7458 output_pic_addr_const (file, disp, 0);
7460 output_addr_const (file, disp);
7462 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7464 && ((GET_CODE (disp) == SYMBOL_REF
7465 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7466 || GET_CODE (disp) == LABEL_REF
7467 || (GET_CODE (disp) == CONST
7468 && GET_CODE (XEXP (disp, 0)) == PLUS
7469 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7470 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7471 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7472 fputs ("(%rip)", file);
7476 if (ASSEMBLER_DIALECT == ASM_ATT)
7481 output_pic_addr_const (file, disp, 0);
7482 else if (GET_CODE (disp) == LABEL_REF)
7483 output_asm_label (disp);
7485 output_addr_const (file, disp);
7490 PRINT_REG (base, 0, file);
7494 PRINT_REG (index, 0, file);
7496 fprintf (file, ",%d", scale);
7502 rtx offset = NULL_RTX;
7506 /* Pull out the offset of a symbol; print any symbol itself. */
7507 if (GET_CODE (disp) == CONST
7508 && GET_CODE (XEXP (disp, 0)) == PLUS
7509 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7511 offset = XEXP (XEXP (disp, 0), 1);
7512 disp = gen_rtx_CONST (VOIDmode,
7513 XEXP (XEXP (disp, 0), 0));
7517 output_pic_addr_const (file, disp, 0);
7518 else if (GET_CODE (disp) == LABEL_REF)
7519 output_asm_label (disp);
7520 else if (GET_CODE (disp) == CONST_INT)
7523 output_addr_const (file, disp);
7529 PRINT_REG (base, 0, file);
7532 if (INTVAL (offset) >= 0)
7534 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7538 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7545 PRINT_REG (index, 0, file);
7547 fprintf (file, "*%d", scale);
7555 output_addr_const_extra (FILE *file, rtx x)
7559 if (GET_CODE (x) != UNSPEC)
7562 op = XVECEXP (x, 0, 0);
7563 switch (XINT (x, 1))
7565 case UNSPEC_GOTTPOFF:
7566 output_addr_const (file, op);
7567 /* FIXME: This might be @TPOFF in Sun ld. */
7568 fputs ("@GOTTPOFF", file);
7571 output_addr_const (file, op);
7572 fputs ("@TPOFF", file);
7575 output_addr_const (file, op);
7577 fputs ("@TPOFF", file);
7579 fputs ("@NTPOFF", file);
7582 output_addr_const (file, op);
7583 fputs ("@DTPOFF", file);
7585 case UNSPEC_GOTNTPOFF:
7586 output_addr_const (file, op);
7588 fputs ("@GOTTPOFF(%rip)", file);
7590 fputs ("@GOTNTPOFF", file);
7592 case UNSPEC_INDNTPOFF:
7593 output_addr_const (file, op);
7594 fputs ("@INDNTPOFF", file);
7604 /* Split one or more DImode RTL references into pairs of SImode
7605 references. The RTL can be REG, offsettable MEM, integer constant, or
7606 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7607 split and "num" is its length. lo_half and hi_half are output arrays
7608 that parallel "operands". */
7611 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7615 rtx op = operands[num];
7617 /* simplify_subreg refuse to split volatile memory addresses,
7618 but we still have to handle it. */
7619 if (GET_CODE (op) == MEM)
7621 lo_half[num] = adjust_address (op, SImode, 0);
7622 hi_half[num] = adjust_address (op, SImode, 4);
7626 lo_half[num] = simplify_gen_subreg (SImode, op,
7627 GET_MODE (op) == VOIDmode
7628 ? DImode : GET_MODE (op), 0);
7629 hi_half[num] = simplify_gen_subreg (SImode, op,
7630 GET_MODE (op) == VOIDmode
7631 ? DImode : GET_MODE (op), 4);
7635 /* Split one or more TImode RTL references into pairs of SImode
7636 references. The RTL can be REG, offsettable MEM, integer constant, or
7637 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7638 split and "num" is its length. lo_half and hi_half are output arrays
7639 that parallel "operands". */
7642 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7646 rtx op = operands[num];
7648 /* simplify_subreg refuse to split volatile memory addresses, but we
7649 still have to handle it. */
7650 if (GET_CODE (op) == MEM)
7652 lo_half[num] = adjust_address (op, DImode, 0);
7653 hi_half[num] = adjust_address (op, DImode, 8);
7657 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7658 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7663 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7664 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7665 is the expression of the binary operation. The output may either be
7666 emitted here, or returned to the caller, like all output_* functions.
7668 There is no guarantee that the operands are the same mode, as they
7669 might be within FLOAT or FLOAT_EXTEND expressions. */
7671 #ifndef SYSV386_COMPAT
7672 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7673 wants to fix the assemblers because that causes incompatibility
7674 with gcc. No-one wants to fix gcc because that causes
7675 incompatibility with assemblers... You can use the option of
7676 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7677 #define SYSV386_COMPAT 1
7681 output_387_binary_op (rtx insn, rtx *operands)
7683 static char buf[30];
7686 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7688 #ifdef ENABLE_CHECKING
7689 /* Even if we do not want to check the inputs, this documents input
7690 constraints. Which helps in understanding the following code. */
7691 if (STACK_REG_P (operands[0])
7692 && ((REG_P (operands[1])
7693 && REGNO (operands[0]) == REGNO (operands[1])
7694 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7695 || (REG_P (operands[2])
7696 && REGNO (operands[0]) == REGNO (operands[2])
7697 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7698 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7704 switch (GET_CODE (operands[3]))
7707 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7708 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7716 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7717 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7725 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7726 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7734 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7735 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7749 if (GET_MODE (operands[0]) == SFmode)
7750 strcat (buf, "ss\t{%2, %0|%0, %2}");
7752 strcat (buf, "sd\t{%2, %0|%0, %2}");
7757 switch (GET_CODE (operands[3]))
7761 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7763 rtx temp = operands[2];
7764 operands[2] = operands[1];
7768 /* know operands[0] == operands[1]. */
7770 if (GET_CODE (operands[2]) == MEM)
7776 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7778 if (STACK_TOP_P (operands[0]))
7779 /* How is it that we are storing to a dead operand[2]?
7780 Well, presumably operands[1] is dead too. We can't
7781 store the result to st(0) as st(0) gets popped on this
7782 instruction. Instead store to operands[2] (which I
7783 think has to be st(1)). st(1) will be popped later.
7784 gcc <= 2.8.1 didn't have this check and generated
7785 assembly code that the Unixware assembler rejected. */
7786 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7788 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7792 if (STACK_TOP_P (operands[0]))
7793 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7795 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7800 if (GET_CODE (operands[1]) == MEM)
7806 if (GET_CODE (operands[2]) == MEM)
7812 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7815 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7816 derived assemblers, confusingly reverse the direction of
7817 the operation for fsub{r} and fdiv{r} when the
7818 destination register is not st(0). The Intel assembler
7819 doesn't have this brain damage. Read !SYSV386_COMPAT to
7820 figure out what the hardware really does. */
7821 if (STACK_TOP_P (operands[0]))
7822 p = "{p\t%0, %2|rp\t%2, %0}";
7824 p = "{rp\t%2, %0|p\t%0, %2}";
7826 if (STACK_TOP_P (operands[0]))
7827 /* As above for fmul/fadd, we can't store to st(0). */
7828 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7830 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7835 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7838 if (STACK_TOP_P (operands[0]))
7839 p = "{rp\t%0, %1|p\t%1, %0}";
7841 p = "{p\t%1, %0|rp\t%0, %1}";
7843 if (STACK_TOP_P (operands[0]))
7844 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7846 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7851 if (STACK_TOP_P (operands[0]))
7853 if (STACK_TOP_P (operands[1]))
7854 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7856 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7859 else if (STACK_TOP_P (operands[1]))
7862 p = "{\t%1, %0|r\t%0, %1}";
7864 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7870 p = "{r\t%2, %0|\t%0, %2}";
7872 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7885 /* Output code to initialize control word copies used by
7886 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7887 is set to control word rounding downwards. */
7889 emit_i387_cw_initialization (rtx normal, rtx round_down)
7891 rtx reg = gen_reg_rtx (HImode);
7893 emit_insn (gen_x86_fnstcw_1 (normal));
7894 emit_move_insn (reg, normal);
7895 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7897 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7899 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7900 emit_move_insn (round_down, reg);
7903 /* Output code for INSN to convert a float to a signed int. OPERANDS
7904 are the insn operands. The output may be [HSD]Imode and the input
7905 operand may be [SDX]Fmode. */
7908 output_fix_trunc (rtx insn, rtx *operands)
7910 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7911 int dimode_p = GET_MODE (operands[0]) == DImode;
7913 /* Jump through a hoop or two for DImode, since the hardware has no
7914 non-popping instruction. We used to do this a different way, but
7915 that was somewhat fragile and broke with post-reload splitters. */
7916 if (dimode_p && !stack_top_dies)
7917 output_asm_insn ("fld\t%y1", operands);
7919 if (!STACK_TOP_P (operands[1]))
7922 if (GET_CODE (operands[0]) != MEM)
7925 output_asm_insn ("fldcw\t%3", operands);
7926 if (stack_top_dies || dimode_p)
7927 output_asm_insn ("fistp%z0\t%0", operands);
7929 output_asm_insn ("fist%z0\t%0", operands);
7930 output_asm_insn ("fldcw\t%2", operands);
7935 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7936 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7937 when fucom should be used. */
7940 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
7943 rtx cmp_op0 = operands[0];
7944 rtx cmp_op1 = operands[1];
7945 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7950 cmp_op1 = operands[2];
7954 if (GET_MODE (operands[0]) == SFmode)
7956 return "ucomiss\t{%1, %0|%0, %1}";
7958 return "comiss\t{%1, %0|%0, %1}";
7961 return "ucomisd\t{%1, %0|%0, %1}";
7963 return "comisd\t{%1, %0|%0, %1}";
7966 if (! STACK_TOP_P (cmp_op0))
7969 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7971 if (STACK_REG_P (cmp_op1)
7973 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7974 && REGNO (cmp_op1) != FIRST_STACK_REG)
7976 /* If both the top of the 387 stack dies, and the other operand
7977 is also a stack register that dies, then this must be a
7978 `fcompp' float compare */
7982 /* There is no double popping fcomi variant. Fortunately,
7983 eflags is immune from the fstp's cc clobbering. */
7985 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7987 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7995 return "fucompp\n\tfnstsw\t%0";
7997 return "fcompp\n\tfnstsw\t%0";
8010 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8012 static const char * const alt[24] =
8024 "fcomi\t{%y1, %0|%0, %y1}",
8025 "fcomip\t{%y1, %0|%0, %y1}",
8026 "fucomi\t{%y1, %0|%0, %y1}",
8027 "fucomip\t{%y1, %0|%0, %y1}",
8034 "fcom%z2\t%y2\n\tfnstsw\t%0",
8035 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8036 "fucom%z2\t%y2\n\tfnstsw\t%0",
8037 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8039 "ficom%z2\t%y2\n\tfnstsw\t%0",
8040 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8048 mask = eflags_p << 3;
8049 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8050 mask |= unordered_p << 1;
8051 mask |= stack_top_dies;
8064 ix86_output_addr_vec_elt (FILE *file, int value)
8066 const char *directive = ASM_LONG;
8071 directive = ASM_QUAD;
8077 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8081 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8084 fprintf (file, "%s%s%d-%s%d\n",
8085 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8086 else if (HAVE_AS_GOTOFF_IN_DATA)
8087 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8089 else if (TARGET_MACHO)
8091 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8092 machopic_output_function_base_name (file);
8093 fprintf(file, "\n");
8097 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8098 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8101 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8105 ix86_expand_clear (rtx dest)
8109 /* We play register width games, which are only valid after reload. */
8110 if (!reload_completed)
8113 /* Avoid HImode and its attendant prefix byte. */
8114 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8115 dest = gen_rtx_REG (SImode, REGNO (dest));
8117 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8119 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8120 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8122 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8123 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8129 /* X is an unchanging MEM. If it is a constant pool reference, return
8130 the constant pool rtx, else NULL. */
8133 maybe_get_pool_constant (rtx x)
8135 x = ix86_delegitimize_address (XEXP (x, 0));
8137 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8138 return get_pool_constant (x);
8144 ix86_expand_move (enum machine_mode mode, rtx operands[])
8146 int strict = (reload_in_progress || reload_completed);
8148 enum tls_model model;
8153 model = tls_symbolic_operand (op1, Pmode);
8156 op1 = legitimize_tls_address (op1, model, true);
8157 op1 = force_operand (op1, op0);
8162 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8167 rtx temp = ((reload_in_progress
8168 || ((op0 && GET_CODE (op0) == REG)
8170 ? op0 : gen_reg_rtx (Pmode));
8171 op1 = machopic_indirect_data_reference (op1, temp);
8172 op1 = machopic_legitimize_pic_address (op1, mode,
8173 temp == op1 ? 0 : temp);
8175 else if (MACHOPIC_INDIRECT)
8176 op1 = machopic_indirect_data_reference (op1, 0);
8180 if (GET_CODE (op0) == MEM)
8181 op1 = force_reg (Pmode, op1);
8185 if (GET_CODE (temp) != REG)
8186 temp = gen_reg_rtx (Pmode);
8187 temp = legitimize_pic_address (op1, temp);
8192 #endif /* TARGET_MACHO */
8196 if (GET_CODE (op0) == MEM
8197 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8198 || !push_operand (op0, mode))
8199 && GET_CODE (op1) == MEM)
8200 op1 = force_reg (mode, op1);
8202 if (push_operand (op0, mode)
8203 && ! general_no_elim_operand (op1, mode))
8204 op1 = copy_to_mode_reg (mode, op1);
8206 /* Force large constants in 64bit compilation into register
8207 to get them CSEed. */
8208 if (TARGET_64BIT && mode == DImode
8209 && immediate_operand (op1, mode)
8210 && !x86_64_zero_extended_value (op1)
8211 && !register_operand (op0, mode)
8212 && optimize && !reload_completed && !reload_in_progress)
8213 op1 = copy_to_mode_reg (mode, op1);
8215 if (FLOAT_MODE_P (mode))
8217 /* If we are loading a floating point constant to a register,
8218 force the value to memory now, since we'll get better code
8219 out the back end. */
8223 else if (GET_CODE (op1) == CONST_DOUBLE)
8225 op1 = validize_mem (force_const_mem (mode, op1));
8226 if (!register_operand (op0, mode))
8228 rtx temp = gen_reg_rtx (mode);
8229 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8230 emit_move_insn (op0, temp);
8237 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8241 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8243 /* Force constants other than zero into memory. We do not know how
8244 the instructions used to build constants modify the upper 64 bits
8245 of the register, once we have that information we may be able
8246 to handle some of them more efficiently. */
8247 if ((reload_in_progress | reload_completed) == 0
8248 && register_operand (operands[0], mode)
8249 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8250 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8252 /* Make operand1 a register if it isn't already. */
8254 && !register_operand (operands[0], mode)
8255 && !register_operand (operands[1], mode))
8257 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8258 emit_move_insn (operands[0], temp);
8262 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8265 /* Attempt to expand a binary operator. Make the expansion closer to the
8266 actual machine, then just general_operand, which will allow 3 separate
8267 memory references (one output, two input) in a single insn. */
8270 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8273 int matching_memory;
8274 rtx src1, src2, dst, op, clob;
8280 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8281 if (GET_RTX_CLASS (code) == 'c'
8282 && (rtx_equal_p (dst, src2)
8283 || immediate_operand (src1, mode)))
8290 /* If the destination is memory, and we do not have matching source
8291 operands, do things in registers. */
8292 matching_memory = 0;
8293 if (GET_CODE (dst) == MEM)
8295 if (rtx_equal_p (dst, src1))
8296 matching_memory = 1;
8297 else if (GET_RTX_CLASS (code) == 'c'
8298 && rtx_equal_p (dst, src2))
8299 matching_memory = 2;
8301 dst = gen_reg_rtx (mode);
8304 /* Both source operands cannot be in memory. */
8305 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8307 if (matching_memory != 2)
8308 src2 = force_reg (mode, src2);
8310 src1 = force_reg (mode, src1);
8313 /* If the operation is not commutable, source 1 cannot be a constant
8314 or non-matching memory. */
8315 if ((CONSTANT_P (src1)
8316 || (!matching_memory && GET_CODE (src1) == MEM))
8317 && GET_RTX_CLASS (code) != 'c')
8318 src1 = force_reg (mode, src1);
8320 /* If optimizing, copy to regs to improve CSE */
8321 if (optimize && ! no_new_pseudos)
8323 if (GET_CODE (dst) == MEM)
8324 dst = gen_reg_rtx (mode);
8325 if (GET_CODE (src1) == MEM)
8326 src1 = force_reg (mode, src1);
8327 if (GET_CODE (src2) == MEM)
8328 src2 = force_reg (mode, src2);
8331 /* Emit the instruction. */
8333 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8334 if (reload_in_progress)
8336 /* Reload doesn't know about the flags register, and doesn't know that
8337 it doesn't want to clobber it. We can only do this with PLUS. */
8344 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8345 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8348 /* Fix up the destination if needed. */
8349 if (dst != operands[0])
8350 emit_move_insn (operands[0], dst);
8353 /* Return TRUE or FALSE depending on whether the binary operator meets the
8354 appropriate constraints. */
8357 ix86_binary_operator_ok (enum rtx_code code,
8358 enum machine_mode mode ATTRIBUTE_UNUSED,
8361 /* Both source operands cannot be in memory. */
8362 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8364 /* If the operation is not commutable, source 1 cannot be a constant. */
8365 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8367 /* If the destination is memory, we must have a matching source operand. */
8368 if (GET_CODE (operands[0]) == MEM
8369 && ! (rtx_equal_p (operands[0], operands[1])
8370 || (GET_RTX_CLASS (code) == 'c'
8371 && rtx_equal_p (operands[0], operands[2]))))
8373 /* If the operation is not commutable and the source 1 is memory, we must
8374 have a matching destination. */
8375 if (GET_CODE (operands[1]) == MEM
8376 && GET_RTX_CLASS (code) != 'c'
8377 && ! rtx_equal_p (operands[0], operands[1]))
8382 /* Attempt to expand a unary operator. Make the expansion closer to the
8383 actual machine, then just general_operand, which will allow 2 separate
8384 memory references (one output, one input) in a single insn. */
8387 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8390 int matching_memory;
8391 rtx src, dst, op, clob;
8396 /* If the destination is memory, and we do not have matching source
8397 operands, do things in registers. */
8398 matching_memory = 0;
8399 if (GET_CODE (dst) == MEM)
8401 if (rtx_equal_p (dst, src))
8402 matching_memory = 1;
8404 dst = gen_reg_rtx (mode);
8407 /* When source operand is memory, destination must match. */
8408 if (!matching_memory && GET_CODE (src) == MEM)
8409 src = force_reg (mode, src);
8411 /* If optimizing, copy to regs to improve CSE */
8412 if (optimize && ! no_new_pseudos)
8414 if (GET_CODE (dst) == MEM)
8415 dst = gen_reg_rtx (mode);
8416 if (GET_CODE (src) == MEM)
8417 src = force_reg (mode, src);
8420 /* Emit the instruction. */
8422 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8423 if (reload_in_progress || code == NOT)
8425 /* Reload doesn't know about the flags register, and doesn't know that
8426 it doesn't want to clobber it. */
8433 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8434 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8437 /* Fix up the destination if needed. */
8438 if (dst != operands[0])
8439 emit_move_insn (operands[0], dst);
8442 /* Return TRUE or FALSE depending on whether the unary operator meets the
8443 appropriate constraints. */
8446 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8447 enum machine_mode mode ATTRIBUTE_UNUSED,
8448 rtx operands[2] ATTRIBUTE_UNUSED)
8450 /* If one of operands is memory, source and destination must match. */
8451 if ((GET_CODE (operands[0]) == MEM
8452 || GET_CODE (operands[1]) == MEM)
8453 && ! rtx_equal_p (operands[0], operands[1]))
8458 /* Return TRUE or FALSE depending on whether the first SET in INSN
8459 has source and destination with matching CC modes, and that the
8460 CC mode is at least as constrained as REQ_MODE. */
8463 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8466 enum machine_mode set_mode;
8468 set = PATTERN (insn);
8469 if (GET_CODE (set) == PARALLEL)
8470 set = XVECEXP (set, 0, 0);
8471 if (GET_CODE (set) != SET)
8473 if (GET_CODE (SET_SRC (set)) != COMPARE)
8476 set_mode = GET_MODE (SET_DEST (set));
8480 if (req_mode != CCNOmode
8481 && (req_mode != CCmode
8482 || XEXP (SET_SRC (set), 1) != const0_rtx))
8486 if (req_mode == CCGCmode)
8490 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8494 if (req_mode == CCZmode)
8504 return (GET_MODE (SET_SRC (set)) == set_mode);
8507 /* Generate insn patterns to do an integer compare of OPERANDS. */
8510 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8512 enum machine_mode cmpmode;
8515 cmpmode = SELECT_CC_MODE (code, op0, op1);
8516 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8518 /* This is very simple, but making the interface the same as in the
8519 FP case makes the rest of the code easier. */
8520 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8521 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8523 /* Return the test that should be put into the flags user, i.e.
8524 the bcc, scc, or cmov instruction. */
8525 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8528 /* Figure out whether to use ordered or unordered fp comparisons.
8529 Return the appropriate mode to use. */
8532 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8534 /* ??? In order to make all comparisons reversible, we do all comparisons
8535 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8536 all forms trapping and nontrapping comparisons, we can make inequality
8537 comparisons trapping again, since it results in better code when using
8538 FCOM based compares. */
8539 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8543 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8545 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8546 return ix86_fp_compare_mode (code);
8549 /* Only zero flag is needed. */
8551 case NE: /* ZF!=0 */
8553 /* Codes needing carry flag. */
8554 case GEU: /* CF=0 */
8555 case GTU: /* CF=0 & ZF=0 */
8556 case LTU: /* CF=1 */
8557 case LEU: /* CF=1 | ZF=1 */
8559 /* Codes possibly doable only with sign flag when
8560 comparing against zero. */
8561 case GE: /* SF=OF or SF=0 */
8562 case LT: /* SF<>OF or SF=1 */
8563 if (op1 == const0_rtx)
8566 /* For other cases Carry flag is not required. */
8568 /* Codes doable only with sign flag when comparing
8569 against zero, but we miss jump instruction for it
8570 so we need to use relational tests against overflow
8571 that thus needs to be zero. */
8572 case GT: /* ZF=0 & SF=OF */
8573 case LE: /* ZF=1 | SF<>OF */
8574 if (op1 == const0_rtx)
8578 /* strcmp pattern do (use flags) and combine may ask us for proper
8587 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8590 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8592 enum rtx_code swapped_code = swap_condition (code);
8593 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8594 || (ix86_fp_comparison_cost (swapped_code)
8595 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8598 /* Swap, force into registers, or otherwise massage the two operands
8599 to a fp comparison. The operands are updated in place; the new
8600 comparison code is returned. */
8602 static enum rtx_code
8603 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8605 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8606 rtx op0 = *pop0, op1 = *pop1;
8607 enum machine_mode op_mode = GET_MODE (op0);
8608 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8610 /* All of the unordered compare instructions only work on registers.
8611 The same is true of the XFmode compare instructions. The same is
8612 true of the fcomi compare instructions. */
8615 && (fpcmp_mode == CCFPUmode
8616 || op_mode == XFmode
8617 || op_mode == TFmode
8618 || ix86_use_fcomi_compare (code)))
8620 op0 = force_reg (op_mode, op0);
8621 op1 = force_reg (op_mode, op1);
8625 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8626 things around if they appear profitable, otherwise force op0
8629 if (standard_80387_constant_p (op0) == 0
8630 || (GET_CODE (op0) == MEM
8631 && ! (standard_80387_constant_p (op1) == 0
8632 || GET_CODE (op1) == MEM)))
8635 tmp = op0, op0 = op1, op1 = tmp;
8636 code = swap_condition (code);
8639 if (GET_CODE (op0) != REG)
8640 op0 = force_reg (op_mode, op0);
8642 if (CONSTANT_P (op1))
8644 if (standard_80387_constant_p (op1))
8645 op1 = force_reg (op_mode, op1);
8647 op1 = validize_mem (force_const_mem (op_mode, op1));
8651 /* Try to rearrange the comparison to make it cheaper. */
8652 if (ix86_fp_comparison_cost (code)
8653 > ix86_fp_comparison_cost (swap_condition (code))
8654 && (GET_CODE (op1) == REG || !no_new_pseudos))
8657 tmp = op0, op0 = op1, op1 = tmp;
8658 code = swap_condition (code);
8659 if (GET_CODE (op0) != REG)
8660 op0 = force_reg (op_mode, op0);
8668 /* Convert comparison codes we use to represent FP comparison to integer
8669 code that will result in proper branch. Return UNKNOWN if no such code
8671 static enum rtx_code
8672 ix86_fp_compare_code_to_integer (enum rtx_code code)
8701 /* Split comparison code CODE into comparisons we can do using branch
8702 instructions. BYPASS_CODE is comparison code for branch that will
8703 branch around FIRST_CODE and SECOND_CODE. If some of branches
8704 is not required, set value to NIL.
8705 We never require more than two branches. */
8707 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8708 enum rtx_code *first_code,
8709 enum rtx_code *second_code)
8715 /* The fcomi comparison sets flags as follows:
8725 case GT: /* GTU - CF=0 & ZF=0 */
8726 case GE: /* GEU - CF=0 */
8727 case ORDERED: /* PF=0 */
8728 case UNORDERED: /* PF=1 */
8729 case UNEQ: /* EQ - ZF=1 */
8730 case UNLT: /* LTU - CF=1 */
8731 case UNLE: /* LEU - CF=1 | ZF=1 */
8732 case LTGT: /* EQ - ZF=0 */
8734 case LT: /* LTU - CF=1 - fails on unordered */
8736 *bypass_code = UNORDERED;
8738 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8740 *bypass_code = UNORDERED;
8742 case EQ: /* EQ - ZF=1 - fails on unordered */
8744 *bypass_code = UNORDERED;
8746 case NE: /* NE - ZF=0 - fails on unordered */
8748 *second_code = UNORDERED;
8750 case UNGE: /* GEU - CF=0 - fails on unordered */
8752 *second_code = UNORDERED;
8754 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8756 *second_code = UNORDERED;
8761 if (!TARGET_IEEE_FP)
8768 /* Return cost of comparison done fcom + arithmetics operations on AX.
8769 All following functions do use number of instructions as a cost metrics.
8770 In future this should be tweaked to compute bytes for optimize_size and
8771 take into account performance of various instructions on various CPUs. */
8773 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
8775 if (!TARGET_IEEE_FP)
8777 /* The cost of code output by ix86_expand_fp_compare. */
8805 /* Return cost of comparison done using fcomi operation.
8806 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8808 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
8810 enum rtx_code bypass_code, first_code, second_code;
8811 /* Return arbitrarily high cost when instruction is not supported - this
8812 prevents gcc from using it. */
8815 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8816 return (bypass_code != NIL || second_code != NIL) + 2;
8819 /* Return cost of comparison done using sahf operation.
8820 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8822 ix86_fp_comparison_sahf_cost (enum rtx_code code)
8824 enum rtx_code bypass_code, first_code, second_code;
8825 /* Return arbitrarily high cost when instruction is not preferred - this
8826 avoids gcc from using it. */
8827 if (!TARGET_USE_SAHF && !optimize_size)
8829 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8830 return (bypass_code != NIL || second_code != NIL) + 3;
8833 /* Compute cost of the comparison done using any method.
8834 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8836 ix86_fp_comparison_cost (enum rtx_code code)
8838 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8841 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8842 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8844 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8845 if (min > sahf_cost)
8847 if (min > fcomi_cost)
8852 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8855 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
8856 rtx *second_test, rtx *bypass_test)
8858 enum machine_mode fpcmp_mode, intcmp_mode;
8860 int cost = ix86_fp_comparison_cost (code);
8861 enum rtx_code bypass_code, first_code, second_code;
8863 fpcmp_mode = ix86_fp_compare_mode (code);
8864 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8867 *second_test = NULL_RTX;
8869 *bypass_test = NULL_RTX;
8871 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8873 /* Do fcomi/sahf based test when profitable. */
8874 if ((bypass_code == NIL || bypass_test)
8875 && (second_code == NIL || second_test)
8876 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8880 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8881 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8887 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8888 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8890 scratch = gen_reg_rtx (HImode);
8891 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8892 emit_insn (gen_x86_sahf_1 (scratch));
8895 /* The FP codes work out to act like unsigned. */
8896 intcmp_mode = fpcmp_mode;
8898 if (bypass_code != NIL)
8899 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8900 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8902 if (second_code != NIL)
8903 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8904 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8909 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8910 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8911 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8913 scratch = gen_reg_rtx (HImode);
8914 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8916 /* In the unordered case, we have to check C2 for NaN's, which
8917 doesn't happen to work out to anything nice combination-wise.
8918 So do some bit twiddling on the value we've got in AH to come
8919 up with an appropriate set of condition codes. */
8921 intcmp_mode = CCNOmode;
8926 if (code == GT || !TARGET_IEEE_FP)
8928 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8933 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8934 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8935 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8936 intcmp_mode = CCmode;
8942 if (code == LT && TARGET_IEEE_FP)
8944 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8945 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8946 intcmp_mode = CCmode;
8951 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8957 if (code == GE || !TARGET_IEEE_FP)
8959 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8964 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8965 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8972 if (code == LE && TARGET_IEEE_FP)
8974 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8975 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8976 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8977 intcmp_mode = CCmode;
8982 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8988 if (code == EQ && TARGET_IEEE_FP)
8990 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8991 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8992 intcmp_mode = CCmode;
8997 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9004 if (code == NE && TARGET_IEEE_FP)
9006 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9007 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9013 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9019 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9023 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9032 /* Return the test that should be put into the flags user, i.e.
9033 the bcc, scc, or cmov instruction. */
9034 return gen_rtx_fmt_ee (code, VOIDmode,
9035 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9040 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9043 op0 = ix86_compare_op0;
9044 op1 = ix86_compare_op1;
9047 *second_test = NULL_RTX;
9049 *bypass_test = NULL_RTX;
9051 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9052 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9053 second_test, bypass_test);
9055 ret = ix86_expand_int_compare (code, op0, op1);
9060 /* Return true if the CODE will result in nontrivial jump sequence. */
9062 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9064 enum rtx_code bypass_code, first_code, second_code;
9067 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9068 return bypass_code != NIL || second_code != NIL;
9072 ix86_expand_branch (enum rtx_code code, rtx label)
9076 switch (GET_MODE (ix86_compare_op0))
9082 tmp = ix86_expand_compare (code, NULL, NULL);
9083 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9084 gen_rtx_LABEL_REF (VOIDmode, label),
9086 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9096 enum rtx_code bypass_code, first_code, second_code;
9098 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9101 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9103 /* Check whether we will use the natural sequence with one jump. If
9104 so, we can expand jump early. Otherwise delay expansion by
9105 creating compound insn to not confuse optimizers. */
9106 if (bypass_code == NIL && second_code == NIL
9109 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9110 gen_rtx_LABEL_REF (VOIDmode, label),
9115 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9116 ix86_compare_op0, ix86_compare_op1);
9117 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9118 gen_rtx_LABEL_REF (VOIDmode, label),
9120 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9122 use_fcomi = ix86_use_fcomi_compare (code);
9123 vec = rtvec_alloc (3 + !use_fcomi);
9124 RTVEC_ELT (vec, 0) = tmp;
9126 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9128 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9131 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9133 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9141 /* Expand DImode branch into multiple compare+branch. */
9143 rtx lo[2], hi[2], label2;
9144 enum rtx_code code1, code2, code3;
9146 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9148 tmp = ix86_compare_op0;
9149 ix86_compare_op0 = ix86_compare_op1;
9150 ix86_compare_op1 = tmp;
9151 code = swap_condition (code);
9153 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9154 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9156 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9157 avoid two branches. This costs one extra insn, so disable when
9158 optimizing for size. */
9160 if ((code == EQ || code == NE)
9162 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9167 if (hi[1] != const0_rtx)
9168 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9169 NULL_RTX, 0, OPTAB_WIDEN);
9172 if (lo[1] != const0_rtx)
9173 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9174 NULL_RTX, 0, OPTAB_WIDEN);
9176 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9177 NULL_RTX, 0, OPTAB_WIDEN);
9179 ix86_compare_op0 = tmp;
9180 ix86_compare_op1 = const0_rtx;
9181 ix86_expand_branch (code, label);
9185 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9186 op1 is a constant and the low word is zero, then we can just
9187 examine the high word. */
9189 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9192 case LT: case LTU: case GE: case GEU:
9193 ix86_compare_op0 = hi[0];
9194 ix86_compare_op1 = hi[1];
9195 ix86_expand_branch (code, label);
9201 /* Otherwise, we need two or three jumps. */
9203 label2 = gen_label_rtx ();
9206 code2 = swap_condition (code);
9207 code3 = unsigned_condition (code);
9211 case LT: case GT: case LTU: case GTU:
9214 case LE: code1 = LT; code2 = GT; break;
9215 case GE: code1 = GT; code2 = LT; break;
9216 case LEU: code1 = LTU; code2 = GTU; break;
9217 case GEU: code1 = GTU; code2 = LTU; break;
9219 case EQ: code1 = NIL; code2 = NE; break;
9220 case NE: code2 = NIL; break;
9228 * if (hi(a) < hi(b)) goto true;
9229 * if (hi(a) > hi(b)) goto false;
9230 * if (lo(a) < lo(b)) goto true;
9234 ix86_compare_op0 = hi[0];
9235 ix86_compare_op1 = hi[1];
9238 ix86_expand_branch (code1, label);
9240 ix86_expand_branch (code2, label2);
9242 ix86_compare_op0 = lo[0];
9243 ix86_compare_op1 = lo[1];
9244 ix86_expand_branch (code3, label);
9247 emit_label (label2);
9256 /* Split branch based on floating point condition. */
9258 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9259 rtx target1, rtx target2, rtx tmp)
9262 rtx label = NULL_RTX;
9264 int bypass_probability = -1, second_probability = -1, probability = -1;
9267 if (target2 != pc_rtx)
9270 code = reverse_condition_maybe_unordered (code);
9275 condition = ix86_expand_fp_compare (code, op1, op2,
9276 tmp, &second, &bypass);
9278 if (split_branch_probability >= 0)
9280 /* Distribute the probabilities across the jumps.
9281 Assume the BYPASS and SECOND to be always test
9283 probability = split_branch_probability;
9285 /* Value of 1 is low enough to make no need for probability
9286 to be updated. Later we may run some experiments and see
9287 if unordered values are more frequent in practice. */
9289 bypass_probability = 1;
9291 second_probability = 1;
9293 if (bypass != NULL_RTX)
9295 label = gen_label_rtx ();
9296 i = emit_jump_insn (gen_rtx_SET
9298 gen_rtx_IF_THEN_ELSE (VOIDmode,
9300 gen_rtx_LABEL_REF (VOIDmode,
9303 if (bypass_probability >= 0)
9305 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9306 GEN_INT (bypass_probability),
9309 i = emit_jump_insn (gen_rtx_SET
9311 gen_rtx_IF_THEN_ELSE (VOIDmode,
9312 condition, target1, target2)));
9313 if (probability >= 0)
9315 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9316 GEN_INT (probability),
9318 if (second != NULL_RTX)
9320 i = emit_jump_insn (gen_rtx_SET
9322 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9324 if (second_probability >= 0)
9326 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9327 GEN_INT (second_probability),
9330 if (label != NULL_RTX)
9335 ix86_expand_setcc (enum rtx_code code, rtx dest)
9337 rtx ret, tmp, tmpreg;
9338 rtx second_test, bypass_test;
9340 if (GET_MODE (ix86_compare_op0) == DImode
9342 return 0; /* FAIL */
9344 if (GET_MODE (dest) != QImode)
9347 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9348 PUT_MODE (ret, QImode);
9353 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9354 if (bypass_test || second_test)
9356 rtx test = second_test;
9358 rtx tmp2 = gen_reg_rtx (QImode);
9365 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9367 PUT_MODE (test, QImode);
9368 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9371 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9373 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9376 return 1; /* DONE */
9379 /* Expand comparison setting or clearing carry flag. Return true when successful
9380 and set pop for the operation. */
9382 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9384 enum machine_mode mode =
9385 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9387 /* Do not handle DImode compares that go trought special path. Also we can't
9388 deal with FP compares yet. This is possible to add. */
9389 if ((mode == DImode && !TARGET_64BIT))
9391 if (FLOAT_MODE_P (mode))
9393 rtx second_test = NULL, bypass_test = NULL;
9394 rtx compare_op, compare_seq;
9396 /* Shortcut: following common codes never translate into carry flag compares. */
9397 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9398 || code == ORDERED || code == UNORDERED)
9401 /* These comparisons require zero flag; swap operands so they won't. */
9402 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9408 code = swap_condition (code);
9411 /* Try to expand the comparison and verify that we end up with carry flag
9412 based comparison. This is fails to be true only when we decide to expand
9413 comparison using arithmetic that is not too common scenario. */
9415 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9416 &second_test, &bypass_test);
9417 compare_seq = get_insns ();
9420 if (second_test || bypass_test)
9422 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9423 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9424 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9426 code = GET_CODE (compare_op);
9427 if (code != LTU && code != GEU)
9429 emit_insn (compare_seq);
9433 if (!INTEGRAL_MODE_P (mode))
9441 /* Convert a==0 into (unsigned)a<1. */
9444 if (op1 != const0_rtx)
9447 code = (code == EQ ? LTU : GEU);
9450 /* Convert a>b into b<a or a>=b-1. */
9453 if (GET_CODE (op1) == CONST_INT)
9455 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9456 /* Bail out on overflow. We still can swap operands but that
9457 would force loading of the constant into register. */
9458 if (op1 == const0_rtx
9459 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9461 code = (code == GTU ? GEU : LTU);
9468 code = (code == GTU ? LTU : GEU);
9472 /* Convert a>=0 into (unsigned)a<0x80000000. */
9475 if (mode == DImode || op1 != const0_rtx)
9477 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9478 code = (code == LT ? GEU : LTU);
9482 if (mode == DImode || op1 != constm1_rtx)
9484 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9485 code = (code == LE ? GEU : LTU);
9491 /* Swapping operands may cause constant to appear as first operand. */
9492 if (!nonimmediate_operand (op0, VOIDmode))
9496 op0 = force_reg (mode, op0);
9498 ix86_compare_op0 = op0;
9499 ix86_compare_op1 = op1;
9500 *pop = ix86_expand_compare (code, NULL, NULL);
9501 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9507 ix86_expand_int_movcc (rtx operands[])
9509 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9510 rtx compare_seq, compare_op;
9511 rtx second_test, bypass_test;
9512 enum machine_mode mode = GET_MODE (operands[0]);
9513 bool sign_bit_compare_p = false;;
9516 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9517 compare_seq = get_insns ();
9520 compare_code = GET_CODE (compare_op);
9522 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9523 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9524 sign_bit_compare_p = true;
9526 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9527 HImode insns, we'd be swallowed in word prefix ops. */
9529 if ((mode != HImode || TARGET_FAST_PREFIX)
9530 && (mode != DImode || TARGET_64BIT)
9531 && GET_CODE (operands[2]) == CONST_INT
9532 && GET_CODE (operands[3]) == CONST_INT)
9534 rtx out = operands[0];
9535 HOST_WIDE_INT ct = INTVAL (operands[2]);
9536 HOST_WIDE_INT cf = INTVAL (operands[3]);
9540 /* Sign bit compares are better done using shifts than we do by using
9542 if (sign_bit_compare_p
9543 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9544 ix86_compare_op1, &compare_op))
9546 /* Detect overlap between destination and compare sources. */
9549 if (!sign_bit_compare_p)
9553 compare_code = GET_CODE (compare_op);
9555 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9556 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9559 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9562 /* To simplify rest of code, restrict to the GEU case. */
9563 if (compare_code == LTU)
9565 HOST_WIDE_INT tmp = ct;
9568 compare_code = reverse_condition (compare_code);
9569 code = reverse_condition (code);
9574 PUT_CODE (compare_op,
9575 reverse_condition_maybe_unordered
9576 (GET_CODE (compare_op)));
9578 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9582 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9583 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9584 tmp = gen_reg_rtx (mode);
9587 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9589 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9593 if (code == GT || code == GE)
9594 code = reverse_condition (code);
9597 HOST_WIDE_INT tmp = ct;
9602 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9603 ix86_compare_op1, VOIDmode, 0, -1);
9616 tmp = expand_simple_binop (mode, PLUS,
9618 copy_rtx (tmp), 1, OPTAB_DIRECT);
9629 tmp = expand_simple_binop (mode, IOR,
9631 copy_rtx (tmp), 1, OPTAB_DIRECT);
9633 else if (diff == -1 && ct)
9643 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9645 tmp = expand_simple_binop (mode, PLUS,
9646 copy_rtx (tmp), GEN_INT (cf),
9647 copy_rtx (tmp), 1, OPTAB_DIRECT);
9655 * andl cf - ct, dest
9665 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9668 tmp = expand_simple_binop (mode, AND,
9670 gen_int_mode (cf - ct, mode),
9671 copy_rtx (tmp), 1, OPTAB_DIRECT);
9673 tmp = expand_simple_binop (mode, PLUS,
9674 copy_rtx (tmp), GEN_INT (ct),
9675 copy_rtx (tmp), 1, OPTAB_DIRECT);
9678 if (!rtx_equal_p (tmp, out))
9679 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9681 return 1; /* DONE */
9687 tmp = ct, ct = cf, cf = tmp;
9689 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9691 /* We may be reversing unordered compare to normal compare, that
9692 is not valid in general (we may convert non-trapping condition
9693 to trapping one), however on i386 we currently emit all
9694 comparisons unordered. */
9695 compare_code = reverse_condition_maybe_unordered (compare_code);
9696 code = reverse_condition_maybe_unordered (code);
9700 compare_code = reverse_condition (compare_code);
9701 code = reverse_condition (code);
9706 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9707 && GET_CODE (ix86_compare_op1) == CONST_INT)
9709 if (ix86_compare_op1 == const0_rtx
9710 && (code == LT || code == GE))
9711 compare_code = code;
9712 else if (ix86_compare_op1 == constm1_rtx)
9716 else if (code == GT)
9721 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9722 if (compare_code != NIL
9723 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9724 && (cf == -1 || ct == -1))
9726 /* If lea code below could be used, only optimize
9727 if it results in a 2 insn sequence. */
9729 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9730 || diff == 3 || diff == 5 || diff == 9)
9731 || (compare_code == LT && ct == -1)
9732 || (compare_code == GE && cf == -1))
9735 * notl op1 (if necessary)
9743 code = reverse_condition (code);
9746 out = emit_store_flag (out, code, ix86_compare_op0,
9747 ix86_compare_op1, VOIDmode, 0, -1);
9749 out = expand_simple_binop (mode, IOR,
9751 out, 1, OPTAB_DIRECT);
9752 if (out != operands[0])
9753 emit_move_insn (operands[0], out);
9755 return 1; /* DONE */
9760 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9761 || diff == 3 || diff == 5 || diff == 9)
9762 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9763 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9769 * lea cf(dest*(ct-cf)),dest
9773 * This also catches the degenerate setcc-only case.
9779 out = emit_store_flag (out, code, ix86_compare_op0,
9780 ix86_compare_op1, VOIDmode, 0, 1);
9783 /* On x86_64 the lea instruction operates on Pmode, so we need
9784 to get arithmetics done in proper mode to match. */
9786 tmp = copy_rtx (out);
9790 out1 = copy_rtx (out);
9791 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9795 tmp = gen_rtx_PLUS (mode, tmp, out1);
9801 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9804 if (!rtx_equal_p (tmp, out))
9807 out = force_operand (tmp, copy_rtx (out));
9809 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9811 if (!rtx_equal_p (out, operands[0]))
9812 emit_move_insn (operands[0], copy_rtx (out));
9814 return 1; /* DONE */
9818 * General case: Jumpful:
9819 * xorl dest,dest cmpl op1, op2
9820 * cmpl op1, op2 movl ct, dest
9822 * decl dest movl cf, dest
9823 * andl (cf-ct),dest 1:
9828 * This is reasonably steep, but branch mispredict costs are
9829 * high on modern cpus, so consider failing only if optimizing
9833 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9834 && BRANCH_COST >= 2)
9840 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9841 /* We may be reversing unordered compare to normal compare,
9842 that is not valid in general (we may convert non-trapping
9843 condition to trapping one), however on i386 we currently
9844 emit all comparisons unordered. */
9845 code = reverse_condition_maybe_unordered (code);
9848 code = reverse_condition (code);
9849 if (compare_code != NIL)
9850 compare_code = reverse_condition (compare_code);
9854 if (compare_code != NIL)
9856 /* notl op1 (if needed)
9861 For x < 0 (resp. x <= -1) there will be no notl,
9862 so if possible swap the constants to get rid of the
9864 True/false will be -1/0 while code below (store flag
9865 followed by decrement) is 0/-1, so the constants need
9866 to be exchanged once more. */
9868 if (compare_code == GE || !cf)
9870 code = reverse_condition (code);
9875 HOST_WIDE_INT tmp = cf;
9880 out = emit_store_flag (out, code, ix86_compare_op0,
9881 ix86_compare_op1, VOIDmode, 0, -1);
9885 out = emit_store_flag (out, code, ix86_compare_op0,
9886 ix86_compare_op1, VOIDmode, 0, 1);
9888 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9889 copy_rtx (out), 1, OPTAB_DIRECT);
9892 out = expand_simple_binop (mode, AND, copy_rtx (out),
9893 gen_int_mode (cf - ct, mode),
9894 copy_rtx (out), 1, OPTAB_DIRECT);
9896 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9897 copy_rtx (out), 1, OPTAB_DIRECT);
9898 if (!rtx_equal_p (out, operands[0]))
9899 emit_move_insn (operands[0], copy_rtx (out));
9901 return 1; /* DONE */
9905 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9907 /* Try a few things more with specific constants and a variable. */
9910 rtx var, orig_out, out, tmp;
9912 if (BRANCH_COST <= 2)
9913 return 0; /* FAIL */
9915 /* If one of the two operands is an interesting constant, load a
9916 constant with the above and mask it in with a logical operation. */
9918 if (GET_CODE (operands[2]) == CONST_INT)
9921 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9922 operands[3] = constm1_rtx, op = and_optab;
9923 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9924 operands[3] = const0_rtx, op = ior_optab;
9926 return 0; /* FAIL */
9928 else if (GET_CODE (operands[3]) == CONST_INT)
9931 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9932 operands[2] = constm1_rtx, op = and_optab;
9933 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9934 operands[2] = const0_rtx, op = ior_optab;
9936 return 0; /* FAIL */
9939 return 0; /* FAIL */
9941 orig_out = operands[0];
9942 tmp = gen_reg_rtx (mode);
9945 /* Recurse to get the constant loaded. */
9946 if (ix86_expand_int_movcc (operands) == 0)
9947 return 0; /* FAIL */
9949 /* Mask in the interesting variable. */
9950 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9952 if (!rtx_equal_p (out, orig_out))
9953 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9955 return 1; /* DONE */
9959 * For comparison with above,
9969 if (! nonimmediate_operand (operands[2], mode))
9970 operands[2] = force_reg (mode, operands[2]);
9971 if (! nonimmediate_operand (operands[3], mode))
9972 operands[3] = force_reg (mode, operands[3]);
9974 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9976 rtx tmp = gen_reg_rtx (mode);
9977 emit_move_insn (tmp, operands[3]);
9980 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9982 rtx tmp = gen_reg_rtx (mode);
9983 emit_move_insn (tmp, operands[2]);
9987 if (! register_operand (operands[2], VOIDmode)
9989 || ! register_operand (operands[3], VOIDmode)))
9990 operands[2] = force_reg (mode, operands[2]);
9993 && ! register_operand (operands[3], VOIDmode))
9994 operands[3] = force_reg (mode, operands[3]);
9996 emit_insn (compare_seq);
9997 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9998 gen_rtx_IF_THEN_ELSE (mode,
9999 compare_op, operands[2],
10002 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10003 gen_rtx_IF_THEN_ELSE (mode,
10005 copy_rtx (operands[3]),
10006 copy_rtx (operands[0]))));
10008 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10009 gen_rtx_IF_THEN_ELSE (mode,
10011 copy_rtx (operands[2]),
10012 copy_rtx (operands[0]))));
10014 return 1; /* DONE */
10018 ix86_expand_fp_movcc (rtx operands[])
10020 enum rtx_code code;
10022 rtx compare_op, second_test, bypass_test;
10024 /* For SF/DFmode conditional moves based on comparisons
10025 in same mode, we may want to use SSE min/max instructions. */
10026 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10027 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10028 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10029 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10030 && (!TARGET_IEEE_FP
10031 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10032 /* We may be called from the post-reload splitter. */
10033 && (!REG_P (operands[0])
10034 || SSE_REG_P (operands[0])
10035 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10037 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10038 code = GET_CODE (operands[1]);
10040 /* See if we have (cross) match between comparison operands and
10041 conditional move operands. */
10042 if (rtx_equal_p (operands[2], op1))
10047 code = reverse_condition_maybe_unordered (code);
10049 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10051 /* Check for min operation. */
10052 if (code == LT || code == UNLE)
10060 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10061 if (memory_operand (op0, VOIDmode))
10062 op0 = force_reg (GET_MODE (operands[0]), op0);
10063 if (GET_MODE (operands[0]) == SFmode)
10064 emit_insn (gen_minsf3 (operands[0], op0, op1));
10066 emit_insn (gen_mindf3 (operands[0], op0, op1));
10069 /* Check for max operation. */
10070 if (code == GT || code == UNGE)
10078 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10079 if (memory_operand (op0, VOIDmode))
10080 op0 = force_reg (GET_MODE (operands[0]), op0);
10081 if (GET_MODE (operands[0]) == SFmode)
10082 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10084 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10088 /* Manage condition to be sse_comparison_operator. In case we are
10089 in non-ieee mode, try to canonicalize the destination operand
10090 to be first in the comparison - this helps reload to avoid extra
10092 if (!sse_comparison_operator (operands[1], VOIDmode)
10093 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10095 rtx tmp = ix86_compare_op0;
10096 ix86_compare_op0 = ix86_compare_op1;
10097 ix86_compare_op1 = tmp;
10098 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10099 VOIDmode, ix86_compare_op0,
10102 /* Similarly try to manage result to be first operand of conditional
10103 move. We also don't support the NE comparison on SSE, so try to
10105 if ((rtx_equal_p (operands[0], operands[3])
10106 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10107 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10109 rtx tmp = operands[2];
10110 operands[2] = operands[3];
10112 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10113 (GET_CODE (operands[1])),
10114 VOIDmode, ix86_compare_op0,
10117 if (GET_MODE (operands[0]) == SFmode)
10118 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10119 operands[2], operands[3],
10120 ix86_compare_op0, ix86_compare_op1));
10122 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10123 operands[2], operands[3],
10124 ix86_compare_op0, ix86_compare_op1));
10128 /* The floating point conditional move instructions don't directly
10129 support conditions resulting from a signed integer comparison. */
10131 code = GET_CODE (operands[1]);
10132 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10134 /* The floating point conditional move instructions don't directly
10135 support signed integer comparisons. */
10137 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10139 if (second_test != NULL || bypass_test != NULL)
10141 tmp = gen_reg_rtx (QImode);
10142 ix86_expand_setcc (code, tmp);
10144 ix86_compare_op0 = tmp;
10145 ix86_compare_op1 = const0_rtx;
10146 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10148 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10150 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10151 emit_move_insn (tmp, operands[3]);
10154 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10156 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10157 emit_move_insn (tmp, operands[2]);
10161 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10162 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10167 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10168 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10173 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10174 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10182 /* Expand conditional increment or decrement using adb/sbb instructions.
10183 The default case using setcc followed by the conditional move can be
10184 done by generic code. */
10186 ix86_expand_int_addcc (rtx operands[])
10188 enum rtx_code code = GET_CODE (operands[1]);
10190 rtx val = const0_rtx;
10191 bool fpcmp = false;
10192 enum machine_mode mode = GET_MODE (operands[0]);
10194 if (operands[3] != const1_rtx
10195 && operands[3] != constm1_rtx)
10197 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10198 ix86_compare_op1, &compare_op))
10200 code = GET_CODE (compare_op);
10202 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10203 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10206 code = ix86_fp_compare_code_to_integer (code);
10213 PUT_CODE (compare_op,
10214 reverse_condition_maybe_unordered
10215 (GET_CODE (compare_op)));
10217 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10219 PUT_MODE (compare_op, mode);
10221 /* Construct either adc or sbb insn. */
10222 if ((code == LTU) == (operands[3] == constm1_rtx))
10224 switch (GET_MODE (operands[0]))
10227 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10230 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10233 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10236 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10244 switch (GET_MODE (operands[0]))
10247 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10250 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10253 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10256 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10262 return 1; /* DONE */
10266 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10267 works for floating pointer parameters and nonoffsetable memories.
10268 For pushes, it returns just stack offsets; the values will be saved
10269 in the right order. Maximally three parts are generated. */
10272 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10277 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10279 size = (GET_MODE_SIZE (mode) + 4) / 8;
10281 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10283 if (size < 2 || size > 3)
10286 /* Optimize constant pool reference to immediates. This is used by fp
10287 moves, that force all constants to memory to allow combining. */
10288 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10290 rtx tmp = maybe_get_pool_constant (operand);
10295 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10297 /* The only non-offsetable memories we handle are pushes. */
10298 if (! push_operand (operand, VOIDmode))
10301 operand = copy_rtx (operand);
10302 PUT_MODE (operand, Pmode);
10303 parts[0] = parts[1] = parts[2] = operand;
10305 else if (!TARGET_64BIT)
10307 if (mode == DImode)
10308 split_di (&operand, 1, &parts[0], &parts[1]);
10311 if (REG_P (operand))
10313 if (!reload_completed)
10315 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10316 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10318 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10320 else if (offsettable_memref_p (operand))
10322 operand = adjust_address (operand, SImode, 0);
10323 parts[0] = operand;
10324 parts[1] = adjust_address (operand, SImode, 4);
10326 parts[2] = adjust_address (operand, SImode, 8);
10328 else if (GET_CODE (operand) == CONST_DOUBLE)
10333 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10338 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10339 parts[2] = gen_int_mode (l[2], SImode);
10342 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10347 parts[1] = gen_int_mode (l[1], SImode);
10348 parts[0] = gen_int_mode (l[0], SImode);
10356 if (mode == TImode)
10357 split_ti (&operand, 1, &parts[0], &parts[1]);
10358 if (mode == XFmode || mode == TFmode)
10360 if (REG_P (operand))
10362 if (!reload_completed)
10364 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10365 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10367 else if (offsettable_memref_p (operand))
10369 operand = adjust_address (operand, DImode, 0);
10370 parts[0] = operand;
10371 parts[1] = adjust_address (operand, SImode, 8);
10373 else if (GET_CODE (operand) == CONST_DOUBLE)
10378 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10379 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10380 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10381 if (HOST_BITS_PER_WIDE_INT >= 64)
10384 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10385 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10388 parts[0] = immed_double_const (l[0], l[1], DImode);
10389 parts[1] = gen_int_mode (l[2], SImode);
10399 /* Emit insns to perform a move or push of DI, DF, and XF values.
10400 Return false when normal moves are needed; true when all required
10401 insns have been emitted. Operands 2-4 contain the input values
10402 int the correct order; operands 5-7 contain the output values. */
10405 ix86_split_long_move (rtx operands[])
10410 int collisions = 0;
10411 enum machine_mode mode = GET_MODE (operands[0]);
10413 /* The DFmode expanders may ask us to move double.
10414 For 64bit target this is single move. By hiding the fact
10415 here we simplify i386.md splitters. */
10416 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10418 /* Optimize constant pool reference to immediates. This is used by
10419 fp moves, that force all constants to memory to allow combining. */
10421 if (GET_CODE (operands[1]) == MEM
10422 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10423 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10424 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10425 if (push_operand (operands[0], VOIDmode))
10427 operands[0] = copy_rtx (operands[0]);
10428 PUT_MODE (operands[0], Pmode);
10431 operands[0] = gen_lowpart (DImode, operands[0]);
10432 operands[1] = gen_lowpart (DImode, operands[1]);
10433 emit_move_insn (operands[0], operands[1]);
10437 /* The only non-offsettable memory we handle is push. */
10438 if (push_operand (operands[0], VOIDmode))
10440 else if (GET_CODE (operands[0]) == MEM
10441 && ! offsettable_memref_p (operands[0]))
10444 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10445 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10447 /* When emitting push, take care for source operands on the stack. */
10448 if (push && GET_CODE (operands[1]) == MEM
10449 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10452 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10453 XEXP (part[1][2], 0));
10454 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10455 XEXP (part[1][1], 0));
10458 /* We need to do copy in the right order in case an address register
10459 of the source overlaps the destination. */
10460 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10462 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10464 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10467 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10470 /* Collision in the middle part can be handled by reordering. */
10471 if (collisions == 1 && nparts == 3
10472 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10475 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10476 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10479 /* If there are more collisions, we can't handle it by reordering.
10480 Do an lea to the last part and use only one colliding move. */
10481 else if (collisions > 1)
10487 base = part[0][nparts - 1];
10489 /* Handle the case when the last part isn't valid for lea.
10490 Happens in 64-bit mode storing the 12-byte XFmode. */
10491 if (GET_MODE (base) != Pmode)
10492 base = gen_rtx_REG (Pmode, REGNO (base));
10494 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10495 part[1][0] = replace_equiv_address (part[1][0], base);
10496 part[1][1] = replace_equiv_address (part[1][1],
10497 plus_constant (base, UNITS_PER_WORD));
10499 part[1][2] = replace_equiv_address (part[1][2],
10500 plus_constant (base, 8));
10510 /* We use only first 12 bytes of TFmode value, but for pushing we
10511 are required to adjust stack as if we were pushing real 16byte
10513 if (mode == TFmode && !TARGET_64BIT)
10514 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10516 emit_move_insn (part[0][2], part[1][2]);
10521 /* In 64bit mode we don't have 32bit push available. In case this is
10522 register, it is OK - we will just use larger counterpart. We also
10523 retype memory - these comes from attempt to avoid REX prefix on
10524 moving of second half of TFmode value. */
10525 if (GET_MODE (part[1][1]) == SImode)
10527 if (GET_CODE (part[1][1]) == MEM)
10528 part[1][1] = adjust_address (part[1][1], DImode, 0);
10529 else if (REG_P (part[1][1]))
10530 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10533 if (GET_MODE (part[1][0]) == SImode)
10534 part[1][0] = part[1][1];
10537 emit_move_insn (part[0][1], part[1][1]);
10538 emit_move_insn (part[0][0], part[1][0]);
10542 /* Choose correct order to not overwrite the source before it is copied. */
10543 if ((REG_P (part[0][0])
10544 && REG_P (part[1][1])
10545 && (REGNO (part[0][0]) == REGNO (part[1][1])
10547 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10549 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10553 operands[2] = part[0][2];
10554 operands[3] = part[0][1];
10555 operands[4] = part[0][0];
10556 operands[5] = part[1][2];
10557 operands[6] = part[1][1];
10558 operands[7] = part[1][0];
10562 operands[2] = part[0][1];
10563 operands[3] = part[0][0];
10564 operands[5] = part[1][1];
10565 operands[6] = part[1][0];
10572 operands[2] = part[0][0];
10573 operands[3] = part[0][1];
10574 operands[4] = part[0][2];
10575 operands[5] = part[1][0];
10576 operands[6] = part[1][1];
10577 operands[7] = part[1][2];
10581 operands[2] = part[0][0];
10582 operands[3] = part[0][1];
10583 operands[5] = part[1][0];
10584 operands[6] = part[1][1];
10587 emit_move_insn (operands[2], operands[5]);
10588 emit_move_insn (operands[3], operands[6]);
10590 emit_move_insn (operands[4], operands[7]);
10596 ix86_split_ashldi (rtx *operands, rtx scratch)
10598 rtx low[2], high[2];
10601 if (GET_CODE (operands[2]) == CONST_INT)
10603 split_di (operands, 2, low, high);
10604 count = INTVAL (operands[2]) & 63;
10608 emit_move_insn (high[0], low[1]);
10609 emit_move_insn (low[0], const0_rtx);
10612 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10616 if (!rtx_equal_p (operands[0], operands[1]))
10617 emit_move_insn (operands[0], operands[1]);
10618 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10619 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10624 if (!rtx_equal_p (operands[0], operands[1]))
10625 emit_move_insn (operands[0], operands[1]);
10627 split_di (operands, 1, low, high);
10629 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10630 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10632 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10634 if (! no_new_pseudos)
10635 scratch = force_reg (SImode, const0_rtx);
10637 emit_move_insn (scratch, const0_rtx);
10639 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10643 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10648 ix86_split_ashrdi (rtx *operands, rtx scratch)
10650 rtx low[2], high[2];
10653 if (GET_CODE (operands[2]) == CONST_INT)
10655 split_di (operands, 2, low, high);
10656 count = INTVAL (operands[2]) & 63;
10660 emit_move_insn (low[0], high[1]);
10662 if (! reload_completed)
10663 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10666 emit_move_insn (high[0], low[0]);
10667 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10671 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10675 if (!rtx_equal_p (operands[0], operands[1]))
10676 emit_move_insn (operands[0], operands[1]);
10677 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10678 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10683 if (!rtx_equal_p (operands[0], operands[1]))
10684 emit_move_insn (operands[0], operands[1]);
10686 split_di (operands, 1, low, high);
10688 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10689 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10691 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10693 if (! no_new_pseudos)
10694 scratch = gen_reg_rtx (SImode);
10695 emit_move_insn (scratch, high[0]);
10696 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10697 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10701 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10706 ix86_split_lshrdi (rtx *operands, rtx scratch)
10708 rtx low[2], high[2];
10711 if (GET_CODE (operands[2]) == CONST_INT)
10713 split_di (operands, 2, low, high);
10714 count = INTVAL (operands[2]) & 63;
10718 emit_move_insn (low[0], high[1]);
10719 emit_move_insn (high[0], const0_rtx);
10722 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10726 if (!rtx_equal_p (operands[0], operands[1]))
10727 emit_move_insn (operands[0], operands[1]);
10728 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10729 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10734 if (!rtx_equal_p (operands[0], operands[1]))
10735 emit_move_insn (operands[0], operands[1]);
10737 split_di (operands, 1, low, high);
10739 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10740 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10742 /* Heh. By reversing the arguments, we can reuse this pattern. */
10743 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10745 if (! no_new_pseudos)
10746 scratch = force_reg (SImode, const0_rtx);
10748 emit_move_insn (scratch, const0_rtx);
10750 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10754 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10758 /* Helper function for the string operations below. Dest VARIABLE whether
10759 it is aligned to VALUE bytes. If true, jump to the label. */
10761 ix86_expand_aligntest (rtx variable, int value)
10763 rtx label = gen_label_rtx ();
10764 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10765 if (GET_MODE (variable) == DImode)
10766 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10768 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10769 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10774 /* Adjust COUNTER by the VALUE. */
10776 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
10778 if (GET_MODE (countreg) == DImode)
10779 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10781 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10784 /* Zero extend possibly SImode EXP to Pmode register. */
10786 ix86_zero_extend_to_Pmode (rtx exp)
10789 if (GET_MODE (exp) == VOIDmode)
10790 return force_reg (Pmode, exp);
10791 if (GET_MODE (exp) == Pmode)
10792 return copy_to_mode_reg (Pmode, exp);
10793 r = gen_reg_rtx (Pmode);
10794 emit_insn (gen_zero_extendsidi2 (r, exp));
10798 /* Expand string move (memcpy) operation. Use i386 string operations when
10799 profitable. expand_clrstr contains similar code. */
10801 ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
10803 rtx srcreg, destreg, countreg;
10804 enum machine_mode counter_mode;
10805 HOST_WIDE_INT align = 0;
10806 unsigned HOST_WIDE_INT count = 0;
10809 if (GET_CODE (align_exp) == CONST_INT)
10810 align = INTVAL (align_exp);
10812 /* Can't use any of this if the user has appropriated esi or edi. */
10813 if (global_regs[4] || global_regs[5])
10816 /* This simple hack avoids all inlining code and simplifies code below. */
10817 if (!TARGET_ALIGN_STRINGOPS)
10820 if (GET_CODE (count_exp) == CONST_INT)
10822 count = INTVAL (count_exp);
10823 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10827 /* Figure out proper mode for counter. For 32bits it is always SImode,
10828 for 64bits use SImode when possible, otherwise DImode.
10829 Set count to number of bytes copied when known at compile time. */
10830 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10831 || x86_64_zero_extended_value (count_exp))
10832 counter_mode = SImode;
10834 counter_mode = DImode;
10838 if (counter_mode != SImode && counter_mode != DImode)
10841 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10842 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10844 emit_insn (gen_cld ());
10846 /* When optimizing for size emit simple rep ; movsb instruction for
10847 counts not divisible by 4. */
10849 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10851 countreg = ix86_zero_extend_to_Pmode (count_exp);
10853 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10854 destreg, srcreg, countreg));
10856 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10857 destreg, srcreg, countreg));
10860 /* For constant aligned (or small unaligned) copies use rep movsl
10861 followed by code copying the rest. For PentiumPro ensure 8 byte
10862 alignment to allow rep movsl acceleration. */
10864 else if (count != 0
10866 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10867 || optimize_size || count < (unsigned int) 64))
10869 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10870 if (count & ~(size - 1))
10872 countreg = copy_to_mode_reg (counter_mode,
10873 GEN_INT ((count >> (size == 4 ? 2 : 3))
10874 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10875 countreg = ix86_zero_extend_to_Pmode (countreg);
10879 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10880 destreg, srcreg, countreg));
10882 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10883 destreg, srcreg, countreg));
10886 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10887 destreg, srcreg, countreg));
10889 if (size == 8 && (count & 0x04))
10890 emit_insn (gen_strmovsi (destreg, srcreg));
10892 emit_insn (gen_strmovhi (destreg, srcreg));
10894 emit_insn (gen_strmovqi (destreg, srcreg));
10896 /* The generic code based on the glibc implementation:
10897 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10898 allowing accelerated copying there)
10899 - copy the data using rep movsl
10900 - copy the rest. */
10905 int desired_alignment = (TARGET_PENTIUMPRO
10906 && (count == 0 || count >= (unsigned int) 260)
10907 ? 8 : UNITS_PER_WORD);
10909 /* In case we don't know anything about the alignment, default to
10910 library version, since it is usually equally fast and result in
10913 Also emit call when we know that the count is large and call overhead
10914 will not be important. */
10915 if (!TARGET_INLINE_ALL_STRINGOPS
10916 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10922 if (TARGET_SINGLE_STRINGOP)
10923 emit_insn (gen_cld ());
10925 countreg2 = gen_reg_rtx (Pmode);
10926 countreg = copy_to_mode_reg (counter_mode, count_exp);
10928 /* We don't use loops to align destination and to copy parts smaller
10929 than 4 bytes, because gcc is able to optimize such code better (in
10930 the case the destination or the count really is aligned, gcc is often
10931 able to predict the branches) and also it is friendlier to the
10932 hardware branch prediction.
10934 Using loops is beneficial for generic case, because we can
10935 handle small counts using the loops. Many CPUs (such as Athlon)
10936 have large REP prefix setup costs.
10938 This is quite costly. Maybe we can revisit this decision later or
10939 add some customizability to this code. */
10941 if (count == 0 && align < desired_alignment)
10943 label = gen_label_rtx ();
10944 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10945 LEU, 0, counter_mode, 1, label);
10949 rtx label = ix86_expand_aligntest (destreg, 1);
10950 emit_insn (gen_strmovqi (destreg, srcreg));
10951 ix86_adjust_counter (countreg, 1);
10952 emit_label (label);
10953 LABEL_NUSES (label) = 1;
10957 rtx label = ix86_expand_aligntest (destreg, 2);
10958 emit_insn (gen_strmovhi (destreg, srcreg));
10959 ix86_adjust_counter (countreg, 2);
10960 emit_label (label);
10961 LABEL_NUSES (label) = 1;
10963 if (align <= 4 && desired_alignment > 4)
10965 rtx label = ix86_expand_aligntest (destreg, 4);
10966 emit_insn (gen_strmovsi (destreg, srcreg));
10967 ix86_adjust_counter (countreg, 4);
10968 emit_label (label);
10969 LABEL_NUSES (label) = 1;
10972 if (label && desired_alignment > 4 && !TARGET_64BIT)
10974 emit_label (label);
10975 LABEL_NUSES (label) = 1;
10978 if (!TARGET_SINGLE_STRINGOP)
10979 emit_insn (gen_cld ());
10982 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10984 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10985 destreg, srcreg, countreg2));
10989 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10990 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10991 destreg, srcreg, countreg2));
10996 emit_label (label);
10997 LABEL_NUSES (label) = 1;
10999 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11000 emit_insn (gen_strmovsi (destreg, srcreg));
11001 if ((align <= 4 || count == 0) && TARGET_64BIT)
11003 rtx label = ix86_expand_aligntest (countreg, 4);
11004 emit_insn (gen_strmovsi (destreg, srcreg));
11005 emit_label (label);
11006 LABEL_NUSES (label) = 1;
11008 if (align > 2 && count != 0 && (count & 2))
11009 emit_insn (gen_strmovhi (destreg, srcreg));
11010 if (align <= 2 || count == 0)
11012 rtx label = ix86_expand_aligntest (countreg, 2);
11013 emit_insn (gen_strmovhi (destreg, srcreg));
11014 emit_label (label);
11015 LABEL_NUSES (label) = 1;
11017 if (align > 1 && count != 0 && (count & 1))
11018 emit_insn (gen_strmovqi (destreg, srcreg));
11019 if (align <= 1 || count == 0)
11021 rtx label = ix86_expand_aligntest (countreg, 1);
11022 emit_insn (gen_strmovqi (destreg, srcreg));
11023 emit_label (label);
11024 LABEL_NUSES (label) = 1;
11028 insns = get_insns ();
11031 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11036 /* Expand string clear operation (bzero). Use i386 string operations when
11037 profitable. expand_movstr contains similar code. */
11039 ix86_expand_clrstr (rtx src, rtx count_exp, rtx align_exp)
11041 rtx destreg, zeroreg, countreg;
11042 enum machine_mode counter_mode;
11043 HOST_WIDE_INT align = 0;
11044 unsigned HOST_WIDE_INT count = 0;
11046 if (GET_CODE (align_exp) == CONST_INT)
11047 align = INTVAL (align_exp);
11049 /* Can't use any of this if the user has appropriated esi. */
11050 if (global_regs[4])
11053 /* This simple hack avoids all inlining code and simplifies code below. */
11054 if (!TARGET_ALIGN_STRINGOPS)
11057 if (GET_CODE (count_exp) == CONST_INT)
11059 count = INTVAL (count_exp);
11060 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11063 /* Figure out proper mode for counter. For 32bits it is always SImode,
11064 for 64bits use SImode when possible, otherwise DImode.
11065 Set count to number of bytes copied when known at compile time. */
11066 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11067 || x86_64_zero_extended_value (count_exp))
11068 counter_mode = SImode;
11070 counter_mode = DImode;
11072 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11074 emit_insn (gen_cld ());
11076 /* When optimizing for size emit simple rep ; movsb instruction for
11077 counts not divisible by 4. */
11079 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11081 countreg = ix86_zero_extend_to_Pmode (count_exp);
11082 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11084 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11085 destreg, countreg));
11087 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11088 destreg, countreg));
11090 else if (count != 0
11092 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11093 || optimize_size || count < (unsigned int) 64))
11095 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11096 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11097 if (count & ~(size - 1))
11099 countreg = copy_to_mode_reg (counter_mode,
11100 GEN_INT ((count >> (size == 4 ? 2 : 3))
11101 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11102 countreg = ix86_zero_extend_to_Pmode (countreg);
11106 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11107 destreg, countreg));
11109 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11110 destreg, countreg));
11113 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11114 destreg, countreg));
11116 if (size == 8 && (count & 0x04))
11117 emit_insn (gen_strsetsi (destreg,
11118 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11120 emit_insn (gen_strsethi (destreg,
11121 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11123 emit_insn (gen_strsetqi (destreg,
11124 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11130 /* Compute desired alignment of the string operation. */
11131 int desired_alignment = (TARGET_PENTIUMPRO
11132 && (count == 0 || count >= (unsigned int) 260)
11133 ? 8 : UNITS_PER_WORD);
11135 /* In case we don't know anything about the alignment, default to
11136 library version, since it is usually equally fast and result in
11139 Also emit call when we know that the count is large and call overhead
11140 will not be important. */
11141 if (!TARGET_INLINE_ALL_STRINGOPS
11142 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11145 if (TARGET_SINGLE_STRINGOP)
11146 emit_insn (gen_cld ());
11148 countreg2 = gen_reg_rtx (Pmode);
11149 countreg = copy_to_mode_reg (counter_mode, count_exp);
11150 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11152 if (count == 0 && align < desired_alignment)
11154 label = gen_label_rtx ();
11155 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11156 LEU, 0, counter_mode, 1, label);
11160 rtx label = ix86_expand_aligntest (destreg, 1);
11161 emit_insn (gen_strsetqi (destreg,
11162 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11163 ix86_adjust_counter (countreg, 1);
11164 emit_label (label);
11165 LABEL_NUSES (label) = 1;
11169 rtx label = ix86_expand_aligntest (destreg, 2);
11170 emit_insn (gen_strsethi (destreg,
11171 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11172 ix86_adjust_counter (countreg, 2);
11173 emit_label (label);
11174 LABEL_NUSES (label) = 1;
11176 if (align <= 4 && desired_alignment > 4)
11178 rtx label = ix86_expand_aligntest (destreg, 4);
11179 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11180 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11182 ix86_adjust_counter (countreg, 4);
11183 emit_label (label);
11184 LABEL_NUSES (label) = 1;
11187 if (label && desired_alignment > 4 && !TARGET_64BIT)
11189 emit_label (label);
11190 LABEL_NUSES (label) = 1;
11194 if (!TARGET_SINGLE_STRINGOP)
11195 emit_insn (gen_cld ());
11198 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11200 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11201 destreg, countreg2));
11205 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11206 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11207 destreg, countreg2));
11211 emit_label (label);
11212 LABEL_NUSES (label) = 1;
11215 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11216 emit_insn (gen_strsetsi (destreg,
11217 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11218 if (TARGET_64BIT && (align <= 4 || count == 0))
11220 rtx label = ix86_expand_aligntest (countreg, 4);
11221 emit_insn (gen_strsetsi (destreg,
11222 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11223 emit_label (label);
11224 LABEL_NUSES (label) = 1;
11226 if (align > 2 && count != 0 && (count & 2))
11227 emit_insn (gen_strsethi (destreg,
11228 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11229 if (align <= 2 || count == 0)
11231 rtx label = ix86_expand_aligntest (countreg, 2);
11232 emit_insn (gen_strsethi (destreg,
11233 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11234 emit_label (label);
11235 LABEL_NUSES (label) = 1;
11237 if (align > 1 && count != 0 && (count & 1))
11238 emit_insn (gen_strsetqi (destreg,
11239 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11240 if (align <= 1 || count == 0)
11242 rtx label = ix86_expand_aligntest (countreg, 1);
11243 emit_insn (gen_strsetqi (destreg,
11244 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11245 emit_label (label);
11246 LABEL_NUSES (label) = 1;
11251 /* Expand strlen. */
11253 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11255 rtx addr, scratch1, scratch2, scratch3, scratch4;
11257 /* The generic case of strlen expander is long. Avoid it's
11258 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11260 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11261 && !TARGET_INLINE_ALL_STRINGOPS
11263 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11266 addr = force_reg (Pmode, XEXP (src, 0));
11267 scratch1 = gen_reg_rtx (Pmode);
11269 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11272 /* Well it seems that some optimizer does not combine a call like
11273 foo(strlen(bar), strlen(bar));
11274 when the move and the subtraction is done here. It does calculate
11275 the length just once when these instructions are done inside of
11276 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11277 often used and I use one fewer register for the lifetime of
11278 output_strlen_unroll() this is better. */
11280 emit_move_insn (out, addr);
11282 ix86_expand_strlensi_unroll_1 (out, align);
11284 /* strlensi_unroll_1 returns the address of the zero at the end of
11285 the string, like memchr(), so compute the length by subtracting
11286 the start address. */
11288 emit_insn (gen_subdi3 (out, out, addr));
11290 emit_insn (gen_subsi3 (out, out, addr));
11294 scratch2 = gen_reg_rtx (Pmode);
11295 scratch3 = gen_reg_rtx (Pmode);
11296 scratch4 = force_reg (Pmode, constm1_rtx);
11298 emit_move_insn (scratch3, addr);
11299 eoschar = force_reg (QImode, eoschar);
11301 emit_insn (gen_cld ());
11304 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11305 align, scratch4, scratch3));
11306 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11307 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11311 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11312 align, scratch4, scratch3));
11313 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11314 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11320 /* Expand the appropriate insns for doing strlen if not just doing
11323 out = result, initialized with the start address
11324 align_rtx = alignment of the address.
11325 scratch = scratch register, initialized with the startaddress when
11326 not aligned, otherwise undefined
11328 This is just the body. It needs the initializations mentioned above and
11329 some address computing at the end. These things are done in i386.md. */
11332 ix86_expand_strlensi_unroll_1 (rtx out, rtx align_rtx)
11336 rtx align_2_label = NULL_RTX;
11337 rtx align_3_label = NULL_RTX;
11338 rtx align_4_label = gen_label_rtx ();
11339 rtx end_0_label = gen_label_rtx ();
11341 rtx tmpreg = gen_reg_rtx (SImode);
11342 rtx scratch = gen_reg_rtx (SImode);
11346 if (GET_CODE (align_rtx) == CONST_INT)
11347 align = INTVAL (align_rtx);
11349 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11351 /* Is there a known alignment and is it less than 4? */
11354 rtx scratch1 = gen_reg_rtx (Pmode);
11355 emit_move_insn (scratch1, out);
11356 /* Is there a known alignment and is it not 2? */
11359 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11360 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11362 /* Leave just the 3 lower bits. */
11363 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11364 NULL_RTX, 0, OPTAB_WIDEN);
11366 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11367 Pmode, 1, align_4_label);
11368 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11369 Pmode, 1, align_2_label);
11370 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11371 Pmode, 1, align_3_label);
11375 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11376 check if is aligned to 4 - byte. */
11378 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11379 NULL_RTX, 0, OPTAB_WIDEN);
11381 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11382 Pmode, 1, align_4_label);
11385 mem = gen_rtx_MEM (QImode, out);
11387 /* Now compare the bytes. */
11389 /* Compare the first n unaligned byte on a byte per byte basis. */
11390 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11391 QImode, 1, end_0_label);
11393 /* Increment the address. */
11395 emit_insn (gen_adddi3 (out, out, const1_rtx));
11397 emit_insn (gen_addsi3 (out, out, const1_rtx));
11399 /* Not needed with an alignment of 2 */
11402 emit_label (align_2_label);
11404 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11408 emit_insn (gen_adddi3 (out, out, const1_rtx));
11410 emit_insn (gen_addsi3 (out, out, const1_rtx));
11412 emit_label (align_3_label);
11415 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11419 emit_insn (gen_adddi3 (out, out, const1_rtx));
11421 emit_insn (gen_addsi3 (out, out, const1_rtx));
11424 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11425 align this loop. It gives only huge programs, but does not help to
11427 emit_label (align_4_label);
11429 mem = gen_rtx_MEM (SImode, out);
11430 emit_move_insn (scratch, mem);
11432 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11434 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11436 /* This formula yields a nonzero result iff one of the bytes is zero.
11437 This saves three branches inside loop and many cycles. */
11439 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11440 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11441 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11442 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11443 gen_int_mode (0x80808080, SImode)));
11444 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11449 rtx reg = gen_reg_rtx (SImode);
11450 rtx reg2 = gen_reg_rtx (Pmode);
11451 emit_move_insn (reg, tmpreg);
11452 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11454 /* If zero is not in the first two bytes, move two bytes forward. */
11455 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11456 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11457 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11458 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11459 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11462 /* Emit lea manually to avoid clobbering of flags. */
11463 emit_insn (gen_rtx_SET (SImode, reg2,
11464 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11466 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11467 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11468 emit_insn (gen_rtx_SET (VOIDmode, out,
11469 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11476 rtx end_2_label = gen_label_rtx ();
11477 /* Is zero in the first two bytes? */
11479 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11480 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11481 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11482 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11483 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11485 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11486 JUMP_LABEL (tmp) = end_2_label;
11488 /* Not in the first two. Move two bytes forward. */
11489 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11491 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11493 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11495 emit_label (end_2_label);
11499 /* Avoid branch in fixing the byte. */
11500 tmpreg = gen_lowpart (QImode, tmpreg);
11501 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11502 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11504 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11506 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11508 emit_label (end_0_label);
11512 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx callarg2,
11513 rtx pop, int sibcall)
11515 rtx use = NULL, call;
11517 if (pop == const0_rtx)
11519 if (TARGET_64BIT && pop)
11523 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11524 fnaddr = machopic_indirect_call_target (fnaddr);
11526 /* Static functions and indirect calls don't need the pic register. */
11527 if (! TARGET_64BIT && flag_pic
11528 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11529 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11530 use_reg (&use, pic_offset_table_rtx);
11532 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11534 rtx al = gen_rtx_REG (QImode, 0);
11535 emit_move_insn (al, callarg2);
11536 use_reg (&use, al);
11538 #endif /* TARGET_MACHO */
11540 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11542 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11543 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11545 if (sibcall && TARGET_64BIT
11546 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11549 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11550 fnaddr = gen_rtx_REG (Pmode, 40);
11551 emit_move_insn (fnaddr, addr);
11552 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11555 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11557 call = gen_rtx_SET (VOIDmode, retval, call);
11560 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11561 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11562 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11565 call = emit_call_insn (call);
11567 CALL_INSN_FUNCTION_USAGE (call) = use;
11571 /* Clear stack slot assignments remembered from previous functions.
11572 This is called from INIT_EXPANDERS once before RTL is emitted for each
11575 static struct machine_function *
11576 ix86_init_machine_status (void)
11578 struct machine_function *f;
11580 f = ggc_alloc_cleared (sizeof (struct machine_function));
11581 f->use_fast_prologue_epilogue_nregs = -1;
11586 /* Return a MEM corresponding to a stack slot with mode MODE.
11587 Allocate a new slot if necessary.
11589 The RTL for a function can have several slots available: N is
11590 which slot to use. */
11593 assign_386_stack_local (enum machine_mode mode, int n)
11595 struct stack_local_entry *s;
11597 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11600 for (s = ix86_stack_locals; s; s = s->next)
11601 if (s->mode == mode && s->n == n)
11604 s = (struct stack_local_entry *)
11605 ggc_alloc (sizeof (struct stack_local_entry));
11608 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11610 s->next = ix86_stack_locals;
11611 ix86_stack_locals = s;
11615 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11617 static GTY(()) rtx ix86_tls_symbol;
11619 ix86_tls_get_addr (void)
11622 if (!ix86_tls_symbol)
11624 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11625 (TARGET_GNU_TLS && !TARGET_64BIT)
11626 ? "___tls_get_addr"
11627 : "__tls_get_addr");
11630 return ix86_tls_symbol;
11633 /* Calculate the length of the memory address in the instruction
11634 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11637 memory_address_length (rtx addr)
11639 struct ix86_address parts;
11640 rtx base, index, disp;
11643 if (GET_CODE (addr) == PRE_DEC
11644 || GET_CODE (addr) == POST_INC
11645 || GET_CODE (addr) == PRE_MODIFY
11646 || GET_CODE (addr) == POST_MODIFY)
11649 if (! ix86_decompose_address (addr, &parts))
11653 index = parts.index;
11658 - esp as the base always wants an index,
11659 - ebp as the base always wants a displacement. */
11661 /* Register Indirect. */
11662 if (base && !index && !disp)
11664 /* esp (for its index) and ebp (for its displacement) need
11665 the two-byte modrm form. */
11666 if (addr == stack_pointer_rtx
11667 || addr == arg_pointer_rtx
11668 || addr == frame_pointer_rtx
11669 || addr == hard_frame_pointer_rtx)
11673 /* Direct Addressing. */
11674 else if (disp && !base && !index)
11679 /* Find the length of the displacement constant. */
11682 if (GET_CODE (disp) == CONST_INT
11683 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11689 /* ebp always wants a displacement. */
11690 else if (base == hard_frame_pointer_rtx)
11693 /* An index requires the two-byte modrm form... */
11695 /* ...like esp, which always wants an index. */
11696 || base == stack_pointer_rtx
11697 || base == arg_pointer_rtx
11698 || base == frame_pointer_rtx)
11705 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11706 is set, expect that insn have 8bit immediate alternative. */
11708 ix86_attr_length_immediate_default (rtx insn, int shortform)
11712 extract_insn_cached (insn);
11713 for (i = recog_data.n_operands - 1; i >= 0; --i)
11714 if (CONSTANT_P (recog_data.operand[i]))
11719 && GET_CODE (recog_data.operand[i]) == CONST_INT
11720 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11724 switch (get_attr_mode (insn))
11735 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11740 fatal_insn ("unknown insn mode", insn);
11746 /* Compute default value for "length_address" attribute. */
11748 ix86_attr_length_address_default (rtx insn)
11752 if (get_attr_type (insn) == TYPE_LEA)
11754 rtx set = PATTERN (insn);
11755 if (GET_CODE (set) == SET)
11757 else if (GET_CODE (set) == PARALLEL
11758 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11759 set = XVECEXP (set, 0, 0);
11762 #ifdef ENABLE_CHECKING
11768 return memory_address_length (SET_SRC (set));
11771 extract_insn_cached (insn);
11772 for (i = recog_data.n_operands - 1; i >= 0; --i)
11773 if (GET_CODE (recog_data.operand[i]) == MEM)
11775 return memory_address_length (XEXP (recog_data.operand[i], 0));
11781 /* Return the maximum number of instructions a cpu can issue. */
11784 ix86_issue_rate (void)
11788 case PROCESSOR_PENTIUM:
11792 case PROCESSOR_PENTIUMPRO:
11793 case PROCESSOR_PENTIUM4:
11794 case PROCESSOR_ATHLON:
11803 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11804 by DEP_INSN and nothing set by DEP_INSN. */
11807 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11811 /* Simplify the test for uninteresting insns. */
11812 if (insn_type != TYPE_SETCC
11813 && insn_type != TYPE_ICMOV
11814 && insn_type != TYPE_FCMOV
11815 && insn_type != TYPE_IBR)
11818 if ((set = single_set (dep_insn)) != 0)
11820 set = SET_DEST (set);
11823 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11824 && XVECLEN (PATTERN (dep_insn), 0) == 2
11825 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11826 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11828 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11829 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11834 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11837 /* This test is true if the dependent insn reads the flags but
11838 not any other potentially set register. */
11839 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11842 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11848 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11849 address with operands set by DEP_INSN. */
11852 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
11856 if (insn_type == TYPE_LEA
11859 addr = PATTERN (insn);
11860 if (GET_CODE (addr) == SET)
11862 else if (GET_CODE (addr) == PARALLEL
11863 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11864 addr = XVECEXP (addr, 0, 0);
11867 addr = SET_SRC (addr);
11872 extract_insn_cached (insn);
11873 for (i = recog_data.n_operands - 1; i >= 0; --i)
11874 if (GET_CODE (recog_data.operand[i]) == MEM)
11876 addr = XEXP (recog_data.operand[i], 0);
11883 return modified_in_p (addr, dep_insn);
11887 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
11889 enum attr_type insn_type, dep_insn_type;
11890 enum attr_memory memory, dep_memory;
11892 int dep_insn_code_number;
11894 /* Anti and output dependencies have zero cost on all CPUs. */
11895 if (REG_NOTE_KIND (link) != 0)
11898 dep_insn_code_number = recog_memoized (dep_insn);
11900 /* If we can't recognize the insns, we can't really do anything. */
11901 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11904 insn_type = get_attr_type (insn);
11905 dep_insn_type = get_attr_type (dep_insn);
11909 case PROCESSOR_PENTIUM:
11910 /* Address Generation Interlock adds a cycle of latency. */
11911 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11914 /* ??? Compares pair with jump/setcc. */
11915 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11918 /* Floating point stores require value to be ready one cycle earlier. */
11919 if (insn_type == TYPE_FMOV
11920 && get_attr_memory (insn) == MEMORY_STORE
11921 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11925 case PROCESSOR_PENTIUMPRO:
11926 memory = get_attr_memory (insn);
11927 dep_memory = get_attr_memory (dep_insn);
11929 /* Since we can't represent delayed latencies of load+operation,
11930 increase the cost here for non-imov insns. */
11931 if (dep_insn_type != TYPE_IMOV
11932 && dep_insn_type != TYPE_FMOV
11933 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11936 /* INT->FP conversion is expensive. */
11937 if (get_attr_fp_int_src (dep_insn))
11940 /* There is one cycle extra latency between an FP op and a store. */
11941 if (insn_type == TYPE_FMOV
11942 && (set = single_set (dep_insn)) != NULL_RTX
11943 && (set2 = single_set (insn)) != NULL_RTX
11944 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11945 && GET_CODE (SET_DEST (set2)) == MEM)
11948 /* Show ability of reorder buffer to hide latency of load by executing
11949 in parallel with previous instruction in case
11950 previous instruction is not needed to compute the address. */
11951 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11952 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11954 /* Claim moves to take one cycle, as core can issue one load
11955 at time and the next load can start cycle later. */
11956 if (dep_insn_type == TYPE_IMOV
11957 || dep_insn_type == TYPE_FMOV)
11965 memory = get_attr_memory (insn);
11966 dep_memory = get_attr_memory (dep_insn);
11967 /* The esp dependency is resolved before the instruction is really
11969 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11970 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11973 /* Since we can't represent delayed latencies of load+operation,
11974 increase the cost here for non-imov insns. */
11975 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11976 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11978 /* INT->FP conversion is expensive. */
11979 if (get_attr_fp_int_src (dep_insn))
11982 /* Show ability of reorder buffer to hide latency of load by executing
11983 in parallel with previous instruction in case
11984 previous instruction is not needed to compute the address. */
11985 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11986 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11988 /* Claim moves to take one cycle, as core can issue one load
11989 at time and the next load can start cycle later. */
11990 if (dep_insn_type == TYPE_IMOV
11991 || dep_insn_type == TYPE_FMOV)
12000 case PROCESSOR_ATHLON:
12002 memory = get_attr_memory (insn);
12003 dep_memory = get_attr_memory (dep_insn);
12005 /* Show ability of reorder buffer to hide latency of load by executing
12006 in parallel with previous instruction in case
12007 previous instruction is not needed to compute the address. */
12008 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12009 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12011 enum attr_unit unit = get_attr_unit (insn);
12014 /* Because of the difference between the length of integer and
12015 floating unit pipeline preparation stages, the memory operands
12016 for floating point are cheaper.
12018 ??? For Athlon it the difference is most probably 2. */
12019 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12022 loadcost = TARGET_ATHLON ? 2 : 0;
12024 if (cost >= loadcost)
12039 struct ppro_sched_data
12042 int issued_this_cycle;
12046 static enum attr_ppro_uops
12047 ix86_safe_ppro_uops (rtx insn)
12049 if (recog_memoized (insn) >= 0)
12050 return get_attr_ppro_uops (insn);
12052 return PPRO_UOPS_MANY;
12056 ix86_dump_ppro_packet (FILE *dump)
12058 if (ix86_sched_data.ppro.decode[0])
12060 fprintf (dump, "PPRO packet: %d",
12061 INSN_UID (ix86_sched_data.ppro.decode[0]));
12062 if (ix86_sched_data.ppro.decode[1])
12063 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12064 if (ix86_sched_data.ppro.decode[2])
12065 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12066 fputc ('\n', dump);
12070 /* We're beginning a new block. Initialize data structures as necessary. */
12073 ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12074 int sched_verbose ATTRIBUTE_UNUSED,
12075 int veclen ATTRIBUTE_UNUSED)
12077 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12080 /* Shift INSN to SLOT, and shift everything else down. */
12083 ix86_reorder_insn (rtx *insnp, rtx *slot)
12089 insnp[0] = insnp[1];
12090 while (++insnp != slot);
12096 ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12099 enum attr_ppro_uops cur_uops;
12100 int issued_this_cycle;
12104 /* At this point .ppro.decode contains the state of the three
12105 decoders from last "cycle". That is, those insns that were
12106 actually independent. But here we're scheduling for the
12107 decoder, and we may find things that are decodable in the
12110 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12111 issued_this_cycle = 0;
12114 cur_uops = ix86_safe_ppro_uops (*insnp);
12116 /* If the decoders are empty, and we've a complex insn at the
12117 head of the priority queue, let it issue without complaint. */
12118 if (decode[0] == NULL)
12120 if (cur_uops == PPRO_UOPS_MANY)
12122 decode[0] = *insnp;
12126 /* Otherwise, search for a 2-4 uop unsn to issue. */
12127 while (cur_uops != PPRO_UOPS_FEW)
12129 if (insnp == ready)
12131 cur_uops = ix86_safe_ppro_uops (*--insnp);
12134 /* If so, move it to the head of the line. */
12135 if (cur_uops == PPRO_UOPS_FEW)
12136 ix86_reorder_insn (insnp, e_ready);
12138 /* Issue the head of the queue. */
12139 issued_this_cycle = 1;
12140 decode[0] = *e_ready--;
12143 /* Look for simple insns to fill in the other two slots. */
12144 for (i = 1; i < 3; ++i)
12145 if (decode[i] == NULL)
12147 if (ready > e_ready)
12151 cur_uops = ix86_safe_ppro_uops (*insnp);
12152 while (cur_uops != PPRO_UOPS_ONE)
12154 if (insnp == ready)
12156 cur_uops = ix86_safe_ppro_uops (*--insnp);
12159 /* Found one. Move it to the head of the queue and issue it. */
12160 if (cur_uops == PPRO_UOPS_ONE)
12162 ix86_reorder_insn (insnp, e_ready);
12163 decode[i] = *e_ready--;
12164 issued_this_cycle++;
12168 /* ??? Didn't find one. Ideally, here we would do a lazy split
12169 of 2-uop insns, issue one and queue the other. */
12173 if (issued_this_cycle == 0)
12174 issued_this_cycle = 1;
12175 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12178 /* We are about to being issuing insns for this clock cycle.
12179 Override the default sort algorithm to better slot instructions. */
12181 ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12182 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12183 int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12185 int n_ready = *n_readyp;
12186 rtx *e_ready = ready + n_ready - 1;
12188 /* Make sure to go ahead and initialize key items in
12189 ix86_sched_data if we are not going to bother trying to
12190 reorder the ready queue. */
12193 ix86_sched_data.ppro.issued_this_cycle = 1;
12202 case PROCESSOR_PENTIUMPRO:
12203 ix86_sched_reorder_ppro (ready, e_ready);
12208 return ix86_issue_rate ();
12211 /* We are about to issue INSN. Return the number of insns left on the
12212 ready queue that can be issued this cycle. */
12215 ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12216 int can_issue_more)
12222 return can_issue_more - 1;
12224 case PROCESSOR_PENTIUMPRO:
12226 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12228 if (uops == PPRO_UOPS_MANY)
12231 ix86_dump_ppro_packet (dump);
12232 ix86_sched_data.ppro.decode[0] = insn;
12233 ix86_sched_data.ppro.decode[1] = NULL;
12234 ix86_sched_data.ppro.decode[2] = NULL;
12236 ix86_dump_ppro_packet (dump);
12237 ix86_sched_data.ppro.decode[0] = NULL;
12239 else if (uops == PPRO_UOPS_FEW)
12242 ix86_dump_ppro_packet (dump);
12243 ix86_sched_data.ppro.decode[0] = insn;
12244 ix86_sched_data.ppro.decode[1] = NULL;
12245 ix86_sched_data.ppro.decode[2] = NULL;
12249 for (i = 0; i < 3; ++i)
12250 if (ix86_sched_data.ppro.decode[i] == NULL)
12252 ix86_sched_data.ppro.decode[i] = insn;
12260 ix86_dump_ppro_packet (dump);
12261 ix86_sched_data.ppro.decode[0] = NULL;
12262 ix86_sched_data.ppro.decode[1] = NULL;
12263 ix86_sched_data.ppro.decode[2] = NULL;
12267 return --ix86_sched_data.ppro.issued_this_cycle;
12272 ia32_use_dfa_pipeline_interface (void)
12274 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12279 /* How many alternative schedules to try. This should be as wide as the
12280 scheduling freedom in the DFA, but no wider. Making this value too
12281 large results extra work for the scheduler. */
12284 ia32_multipass_dfa_lookahead (void)
12286 if (ix86_tune == PROCESSOR_PENTIUM)
12293 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12294 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12298 ix86_set_move_mem_attrs (rtx insns, rtx dstref, rtx srcref, rtx dstreg,
12303 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12305 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12309 /* Subroutine of above to actually do the updating by recursively walking
12313 ix86_set_move_mem_attrs_1 (rtx x, rtx dstref, rtx srcref, rtx dstreg,
12316 enum rtx_code code = GET_CODE (x);
12317 const char *format_ptr = GET_RTX_FORMAT (code);
12320 if (code == MEM && XEXP (x, 0) == dstreg)
12321 MEM_COPY_ATTRIBUTES (x, dstref);
12322 else if (code == MEM && XEXP (x, 0) == srcreg)
12323 MEM_COPY_ATTRIBUTES (x, srcref);
12325 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12327 if (*format_ptr == 'e')
12328 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12330 else if (*format_ptr == 'E')
12331 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12332 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12337 /* Compute the alignment given to a constant that is being placed in memory.
12338 EXP is the constant and ALIGN is the alignment that the object would
12340 The value of this function is used instead of that alignment to align
12344 ix86_constant_alignment (tree exp, int align)
12346 if (TREE_CODE (exp) == REAL_CST)
12348 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12350 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12353 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12360 /* Compute the alignment for a static variable.
12361 TYPE is the data type, and ALIGN is the alignment that
12362 the object would ordinarily have. The value of this function is used
12363 instead of that alignment to align the object. */
12366 ix86_data_alignment (tree type, int align)
12368 if (AGGREGATE_TYPE_P (type)
12369 && TYPE_SIZE (type)
12370 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12371 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12372 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12375 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12376 to 16byte boundary. */
12379 if (AGGREGATE_TYPE_P (type)
12380 && TYPE_SIZE (type)
12381 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12382 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12383 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12387 if (TREE_CODE (type) == ARRAY_TYPE)
12389 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12391 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12394 else if (TREE_CODE (type) == COMPLEX_TYPE)
12397 if (TYPE_MODE (type) == DCmode && align < 64)
12399 if (TYPE_MODE (type) == XCmode && align < 128)
12402 else if ((TREE_CODE (type) == RECORD_TYPE
12403 || TREE_CODE (type) == UNION_TYPE
12404 || TREE_CODE (type) == QUAL_UNION_TYPE)
12405 && TYPE_FIELDS (type))
12407 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12409 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12412 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12413 || TREE_CODE (type) == INTEGER_TYPE)
12415 if (TYPE_MODE (type) == DFmode && align < 64)
12417 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12424 /* Compute the alignment for a local variable.
12425 TYPE is the data type, and ALIGN is the alignment that
12426 the object would ordinarily have. The value of this macro is used
12427 instead of that alignment to align the object. */
12430 ix86_local_alignment (tree type, int align)
12432 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12433 to 16byte boundary. */
12436 if (AGGREGATE_TYPE_P (type)
12437 && TYPE_SIZE (type)
12438 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12439 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12440 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12443 if (TREE_CODE (type) == ARRAY_TYPE)
12445 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12447 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12450 else if (TREE_CODE (type) == COMPLEX_TYPE)
12452 if (TYPE_MODE (type) == DCmode && align < 64)
12454 if (TYPE_MODE (type) == XCmode && align < 128)
12457 else if ((TREE_CODE (type) == RECORD_TYPE
12458 || TREE_CODE (type) == UNION_TYPE
12459 || TREE_CODE (type) == QUAL_UNION_TYPE)
12460 && TYPE_FIELDS (type))
12462 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12464 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12467 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12468 || TREE_CODE (type) == INTEGER_TYPE)
12471 if (TYPE_MODE (type) == DFmode && align < 64)
12473 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12479 /* Emit RTL insns to initialize the variable parts of a trampoline.
12480 FNADDR is an RTX for the address of the function's pure code.
12481 CXT is an RTX for the static chain value for the function. */
12483 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12487 /* Compute offset from the end of the jmp to the target function. */
12488 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12489 plus_constant (tramp, 10),
12490 NULL_RTX, 1, OPTAB_DIRECT);
12491 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12492 gen_int_mode (0xb9, QImode));
12493 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12494 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12495 gen_int_mode (0xe9, QImode));
12496 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12501 /* Try to load address using shorter movl instead of movabs.
12502 We may want to support movq for kernel mode, but kernel does not use
12503 trampolines at the moment. */
12504 if (x86_64_zero_extended_value (fnaddr))
12506 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12507 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12508 gen_int_mode (0xbb41, HImode));
12509 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12510 gen_lowpart (SImode, fnaddr));
12515 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12516 gen_int_mode (0xbb49, HImode));
12517 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12521 /* Load static chain using movabs to r10. */
12522 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12523 gen_int_mode (0xba49, HImode));
12524 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12527 /* Jump to the r11 */
12528 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12529 gen_int_mode (0xff49, HImode));
12530 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12531 gen_int_mode (0xe3, QImode));
12533 if (offset > TRAMPOLINE_SIZE)
12537 #ifdef TRANSFER_FROM_TRAMPOLINE
12538 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12539 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12543 #define def_builtin(MASK, NAME, TYPE, CODE) \
12545 if ((MASK) & target_flags \
12546 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12547 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12548 NULL, NULL_TREE); \
12551 struct builtin_description
12553 const unsigned int mask;
12554 const enum insn_code icode;
12555 const char *const name;
12556 const enum ix86_builtins code;
12557 const enum rtx_code comparison;
12558 const unsigned int flag;
12561 static const struct builtin_description bdesc_comi[] =
12563 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12564 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12565 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12566 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12567 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12568 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12569 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12570 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12571 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12572 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12573 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12574 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12575 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12576 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12577 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12578 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12579 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12580 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12581 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12582 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12583 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12584 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12585 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12586 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12589 static const struct builtin_description bdesc_2arg[] =
12592 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12593 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12594 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12595 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12596 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12597 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12598 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12599 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12601 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12602 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12603 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12604 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12605 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12606 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12607 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12608 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12609 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12610 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12611 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12612 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12613 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12614 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12615 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12616 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12617 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12618 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12619 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12620 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12622 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12623 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12624 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12625 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12627 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12628 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12629 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12630 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12632 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12633 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12634 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12635 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12636 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12639 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12640 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12641 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12642 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12643 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12644 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12645 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12646 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12648 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12649 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12650 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12651 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12652 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12653 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12654 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12655 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12657 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12658 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12659 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12661 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12662 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12663 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12664 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12666 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12667 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12669 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12670 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12671 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12672 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12673 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12674 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12676 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12677 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12678 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12679 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12681 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12682 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12683 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12684 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12685 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12686 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12689 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12690 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12691 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12693 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12694 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12695 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12697 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12698 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12699 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12700 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12701 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12702 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12704 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12705 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12706 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12707 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12708 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12709 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12711 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12712 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12713 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12714 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12716 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12717 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12721 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12722 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12729 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12730 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12731 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12732 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12733 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12734 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12735 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12736 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12737 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12738 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12739 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12740 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12741 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12742 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12743 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12744 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12745 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12746 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12747 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12748 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12750 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12755 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12756 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12758 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12762 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12765 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12769 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12770 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12772 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12774 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12775 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12776 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12777 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12778 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12779 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12780 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12781 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12790 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12791 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12793 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12794 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12796 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12797 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12798 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12801 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12804 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12832 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12833 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12834 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12835 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12836 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12838 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12839 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12840 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12841 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12843 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12845 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12846 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
12847 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12848 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
12851 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
12852 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
12853 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
12854 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
12855 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
12856 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
12859 static const struct builtin_description bdesc_1arg[] =
12861 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12862 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12864 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12865 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12866 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12868 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12869 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12870 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
12871 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12872 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12873 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
12875 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12876 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12877 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12878 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12880 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12882 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12883 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12885 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12886 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12887 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12888 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12889 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12891 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12893 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12894 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12895 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
12896 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
12905 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
12906 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
12907 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
12911 ix86_init_builtins (void)
12914 ix86_init_mmx_sse_builtins ();
12917 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12918 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12921 ix86_init_mmx_sse_builtins (void)
12923 const struct builtin_description * d;
12926 tree pchar_type_node = build_pointer_type (char_type_node);
12927 tree pcchar_type_node = build_pointer_type (
12928 build_type_variant (char_type_node, 1, 0));
12929 tree pfloat_type_node = build_pointer_type (float_type_node);
12930 tree pcfloat_type_node = build_pointer_type (
12931 build_type_variant (float_type_node, 1, 0));
12932 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12933 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12934 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12937 tree int_ftype_v4sf_v4sf
12938 = build_function_type_list (integer_type_node,
12939 V4SF_type_node, V4SF_type_node, NULL_TREE);
12940 tree v4si_ftype_v4sf_v4sf
12941 = build_function_type_list (V4SI_type_node,
12942 V4SF_type_node, V4SF_type_node, NULL_TREE);
12943 /* MMX/SSE/integer conversions. */
12944 tree int_ftype_v4sf
12945 = build_function_type_list (integer_type_node,
12946 V4SF_type_node, NULL_TREE);
12947 tree int64_ftype_v4sf
12948 = build_function_type_list (long_long_integer_type_node,
12949 V4SF_type_node, NULL_TREE);
12950 tree int_ftype_v8qi
12951 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12952 tree v4sf_ftype_v4sf_int
12953 = build_function_type_list (V4SF_type_node,
12954 V4SF_type_node, integer_type_node, NULL_TREE);
12955 tree v4sf_ftype_v4sf_int64
12956 = build_function_type_list (V4SF_type_node,
12957 V4SF_type_node, long_long_integer_type_node,
12959 tree v4sf_ftype_v4sf_v2si
12960 = build_function_type_list (V4SF_type_node,
12961 V4SF_type_node, V2SI_type_node, NULL_TREE);
12962 tree int_ftype_v4hi_int
12963 = build_function_type_list (integer_type_node,
12964 V4HI_type_node, integer_type_node, NULL_TREE);
12965 tree v4hi_ftype_v4hi_int_int
12966 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12967 integer_type_node, integer_type_node,
12969 /* Miscellaneous. */
12970 tree v8qi_ftype_v4hi_v4hi
12971 = build_function_type_list (V8QI_type_node,
12972 V4HI_type_node, V4HI_type_node, NULL_TREE);
12973 tree v4hi_ftype_v2si_v2si
12974 = build_function_type_list (V4HI_type_node,
12975 V2SI_type_node, V2SI_type_node, NULL_TREE);
12976 tree v4sf_ftype_v4sf_v4sf_int
12977 = build_function_type_list (V4SF_type_node,
12978 V4SF_type_node, V4SF_type_node,
12979 integer_type_node, NULL_TREE);
12980 tree v2si_ftype_v4hi_v4hi
12981 = build_function_type_list (V2SI_type_node,
12982 V4HI_type_node, V4HI_type_node, NULL_TREE);
12983 tree v4hi_ftype_v4hi_int
12984 = build_function_type_list (V4HI_type_node,
12985 V4HI_type_node, integer_type_node, NULL_TREE);
12986 tree v4hi_ftype_v4hi_di
12987 = build_function_type_list (V4HI_type_node,
12988 V4HI_type_node, long_long_unsigned_type_node,
12990 tree v2si_ftype_v2si_di
12991 = build_function_type_list (V2SI_type_node,
12992 V2SI_type_node, long_long_unsigned_type_node,
12994 tree void_ftype_void
12995 = build_function_type (void_type_node, void_list_node);
12996 tree void_ftype_unsigned
12997 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12998 tree void_ftype_unsigned_unsigned
12999 = build_function_type_list (void_type_node, unsigned_type_node,
13000 unsigned_type_node, NULL_TREE);
13001 tree void_ftype_pcvoid_unsigned_unsigned
13002 = build_function_type_list (void_type_node, const_ptr_type_node,
13003 unsigned_type_node, unsigned_type_node,
13005 tree unsigned_ftype_void
13006 = build_function_type (unsigned_type_node, void_list_node);
13008 = build_function_type (long_long_unsigned_type_node, void_list_node);
13009 tree v4sf_ftype_void
13010 = build_function_type (V4SF_type_node, void_list_node);
13011 tree v2si_ftype_v4sf
13012 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13013 /* Loads/stores. */
13014 tree void_ftype_v8qi_v8qi_pchar
13015 = build_function_type_list (void_type_node,
13016 V8QI_type_node, V8QI_type_node,
13017 pchar_type_node, NULL_TREE);
13018 tree v4sf_ftype_pcfloat
13019 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13020 /* @@@ the type is bogus */
13021 tree v4sf_ftype_v4sf_pv2si
13022 = build_function_type_list (V4SF_type_node,
13023 V4SF_type_node, pv2si_type_node, NULL_TREE);
13024 tree void_ftype_pv2si_v4sf
13025 = build_function_type_list (void_type_node,
13026 pv2si_type_node, V4SF_type_node, NULL_TREE);
13027 tree void_ftype_pfloat_v4sf
13028 = build_function_type_list (void_type_node,
13029 pfloat_type_node, V4SF_type_node, NULL_TREE);
13030 tree void_ftype_pdi_di
13031 = build_function_type_list (void_type_node,
13032 pdi_type_node, long_long_unsigned_type_node,
13034 tree void_ftype_pv2di_v2di
13035 = build_function_type_list (void_type_node,
13036 pv2di_type_node, V2DI_type_node, NULL_TREE);
13037 /* Normal vector unops. */
13038 tree v4sf_ftype_v4sf
13039 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13041 /* Normal vector binops. */
13042 tree v4sf_ftype_v4sf_v4sf
13043 = build_function_type_list (V4SF_type_node,
13044 V4SF_type_node, V4SF_type_node, NULL_TREE);
13045 tree v8qi_ftype_v8qi_v8qi
13046 = build_function_type_list (V8QI_type_node,
13047 V8QI_type_node, V8QI_type_node, NULL_TREE);
13048 tree v4hi_ftype_v4hi_v4hi
13049 = build_function_type_list (V4HI_type_node,
13050 V4HI_type_node, V4HI_type_node, NULL_TREE);
13051 tree v2si_ftype_v2si_v2si
13052 = build_function_type_list (V2SI_type_node,
13053 V2SI_type_node, V2SI_type_node, NULL_TREE);
13054 tree di_ftype_di_di
13055 = build_function_type_list (long_long_unsigned_type_node,
13056 long_long_unsigned_type_node,
13057 long_long_unsigned_type_node, NULL_TREE);
13059 tree v2si_ftype_v2sf
13060 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13061 tree v2sf_ftype_v2si
13062 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13063 tree v2si_ftype_v2si
13064 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13065 tree v2sf_ftype_v2sf
13066 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13067 tree v2sf_ftype_v2sf_v2sf
13068 = build_function_type_list (V2SF_type_node,
13069 V2SF_type_node, V2SF_type_node, NULL_TREE);
13070 tree v2si_ftype_v2sf_v2sf
13071 = build_function_type_list (V2SI_type_node,
13072 V2SF_type_node, V2SF_type_node, NULL_TREE);
13073 tree pint_type_node = build_pointer_type (integer_type_node);
13074 tree pcint_type_node = build_pointer_type (
13075 build_type_variant (integer_type_node, 1, 0));
13076 tree pdouble_type_node = build_pointer_type (double_type_node);
13077 tree pcdouble_type_node = build_pointer_type (
13078 build_type_variant (double_type_node, 1, 0));
13079 tree int_ftype_v2df_v2df
13080 = build_function_type_list (integer_type_node,
13081 V2DF_type_node, V2DF_type_node, NULL_TREE);
13084 = build_function_type (intTI_type_node, void_list_node);
13085 tree v2di_ftype_void
13086 = build_function_type (V2DI_type_node, void_list_node);
13087 tree ti_ftype_ti_ti
13088 = build_function_type_list (intTI_type_node,
13089 intTI_type_node, intTI_type_node, NULL_TREE);
13090 tree void_ftype_pcvoid
13091 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13093 = build_function_type_list (V2DI_type_node,
13094 long_long_unsigned_type_node, NULL_TREE);
13096 = build_function_type_list (long_long_unsigned_type_node,
13097 V2DI_type_node, NULL_TREE);
13098 tree v4sf_ftype_v4si
13099 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13100 tree v4si_ftype_v4sf
13101 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13102 tree v2df_ftype_v4si
13103 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13104 tree v4si_ftype_v2df
13105 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13106 tree v2si_ftype_v2df
13107 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13108 tree v4sf_ftype_v2df
13109 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13110 tree v2df_ftype_v2si
13111 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13112 tree v2df_ftype_v4sf
13113 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13114 tree int_ftype_v2df
13115 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13116 tree int64_ftype_v2df
13117 = build_function_type_list (long_long_integer_type_node,
13118 V2DF_type_node, NULL_TREE);
13119 tree v2df_ftype_v2df_int
13120 = build_function_type_list (V2DF_type_node,
13121 V2DF_type_node, integer_type_node, NULL_TREE);
13122 tree v2df_ftype_v2df_int64
13123 = build_function_type_list (V2DF_type_node,
13124 V2DF_type_node, long_long_integer_type_node,
13126 tree v4sf_ftype_v4sf_v2df
13127 = build_function_type_list (V4SF_type_node,
13128 V4SF_type_node, V2DF_type_node, NULL_TREE);
13129 tree v2df_ftype_v2df_v4sf
13130 = build_function_type_list (V2DF_type_node,
13131 V2DF_type_node, V4SF_type_node, NULL_TREE);
13132 tree v2df_ftype_v2df_v2df_int
13133 = build_function_type_list (V2DF_type_node,
13134 V2DF_type_node, V2DF_type_node,
13137 tree v2df_ftype_v2df_pv2si
13138 = build_function_type_list (V2DF_type_node,
13139 V2DF_type_node, pv2si_type_node, NULL_TREE);
13140 tree void_ftype_pv2si_v2df
13141 = build_function_type_list (void_type_node,
13142 pv2si_type_node, V2DF_type_node, NULL_TREE);
13143 tree void_ftype_pdouble_v2df
13144 = build_function_type_list (void_type_node,
13145 pdouble_type_node, V2DF_type_node, NULL_TREE);
13146 tree void_ftype_pint_int
13147 = build_function_type_list (void_type_node,
13148 pint_type_node, integer_type_node, NULL_TREE);
13149 tree void_ftype_v16qi_v16qi_pchar
13150 = build_function_type_list (void_type_node,
13151 V16QI_type_node, V16QI_type_node,
13152 pchar_type_node, NULL_TREE);
13153 tree v2df_ftype_pcdouble
13154 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13155 tree v2df_ftype_v2df_v2df
13156 = build_function_type_list (V2DF_type_node,
13157 V2DF_type_node, V2DF_type_node, NULL_TREE);
13158 tree v16qi_ftype_v16qi_v16qi
13159 = build_function_type_list (V16QI_type_node,
13160 V16QI_type_node, V16QI_type_node, NULL_TREE);
13161 tree v8hi_ftype_v8hi_v8hi
13162 = build_function_type_list (V8HI_type_node,
13163 V8HI_type_node, V8HI_type_node, NULL_TREE);
13164 tree v4si_ftype_v4si_v4si
13165 = build_function_type_list (V4SI_type_node,
13166 V4SI_type_node, V4SI_type_node, NULL_TREE);
13167 tree v2di_ftype_v2di_v2di
13168 = build_function_type_list (V2DI_type_node,
13169 V2DI_type_node, V2DI_type_node, NULL_TREE);
13170 tree v2di_ftype_v2df_v2df
13171 = build_function_type_list (V2DI_type_node,
13172 V2DF_type_node, V2DF_type_node, NULL_TREE);
13173 tree v2df_ftype_v2df
13174 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13175 tree v2df_ftype_double
13176 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13177 tree v2df_ftype_double_double
13178 = build_function_type_list (V2DF_type_node,
13179 double_type_node, double_type_node, NULL_TREE);
13180 tree int_ftype_v8hi_int
13181 = build_function_type_list (integer_type_node,
13182 V8HI_type_node, integer_type_node, NULL_TREE);
13183 tree v8hi_ftype_v8hi_int_int
13184 = build_function_type_list (V8HI_type_node,
13185 V8HI_type_node, integer_type_node,
13186 integer_type_node, NULL_TREE);
13187 tree v2di_ftype_v2di_int
13188 = build_function_type_list (V2DI_type_node,
13189 V2DI_type_node, integer_type_node, NULL_TREE);
13190 tree v4si_ftype_v4si_int
13191 = build_function_type_list (V4SI_type_node,
13192 V4SI_type_node, integer_type_node, NULL_TREE);
13193 tree v8hi_ftype_v8hi_int
13194 = build_function_type_list (V8HI_type_node,
13195 V8HI_type_node, integer_type_node, NULL_TREE);
13196 tree v8hi_ftype_v8hi_v2di
13197 = build_function_type_list (V8HI_type_node,
13198 V8HI_type_node, V2DI_type_node, NULL_TREE);
13199 tree v4si_ftype_v4si_v2di
13200 = build_function_type_list (V4SI_type_node,
13201 V4SI_type_node, V2DI_type_node, NULL_TREE);
13202 tree v4si_ftype_v8hi_v8hi
13203 = build_function_type_list (V4SI_type_node,
13204 V8HI_type_node, V8HI_type_node, NULL_TREE);
13205 tree di_ftype_v8qi_v8qi
13206 = build_function_type_list (long_long_unsigned_type_node,
13207 V8QI_type_node, V8QI_type_node, NULL_TREE);
13208 tree v2di_ftype_v16qi_v16qi
13209 = build_function_type_list (V2DI_type_node,
13210 V16QI_type_node, V16QI_type_node, NULL_TREE);
13211 tree int_ftype_v16qi
13212 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13213 tree v16qi_ftype_pcchar
13214 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13215 tree void_ftype_pchar_v16qi
13216 = build_function_type_list (void_type_node,
13217 pchar_type_node, V16QI_type_node, NULL_TREE);
13218 tree v4si_ftype_pcint
13219 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13220 tree void_ftype_pcint_v4si
13221 = build_function_type_list (void_type_node,
13222 pcint_type_node, V4SI_type_node, NULL_TREE);
13223 tree v2di_ftype_v2di
13224 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13226 /* Add all builtins that are more or less simple operations on two
13228 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13230 /* Use one of the operands; the target can have a different mode for
13231 mask-generating compares. */
13232 enum machine_mode mode;
13237 mode = insn_data[d->icode].operand[1].mode;
13242 type = v16qi_ftype_v16qi_v16qi;
13245 type = v8hi_ftype_v8hi_v8hi;
13248 type = v4si_ftype_v4si_v4si;
13251 type = v2di_ftype_v2di_v2di;
13254 type = v2df_ftype_v2df_v2df;
13257 type = ti_ftype_ti_ti;
13260 type = v4sf_ftype_v4sf_v4sf;
13263 type = v8qi_ftype_v8qi_v8qi;
13266 type = v4hi_ftype_v4hi_v4hi;
13269 type = v2si_ftype_v2si_v2si;
13272 type = di_ftype_di_di;
13279 /* Override for comparisons. */
13280 if (d->icode == CODE_FOR_maskcmpv4sf3
13281 || d->icode == CODE_FOR_maskncmpv4sf3
13282 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13283 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13284 type = v4si_ftype_v4sf_v4sf;
13286 if (d->icode == CODE_FOR_maskcmpv2df3
13287 || d->icode == CODE_FOR_maskncmpv2df3
13288 || d->icode == CODE_FOR_vmmaskcmpv2df3
13289 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13290 type = v2di_ftype_v2df_v2df;
13292 def_builtin (d->mask, d->name, type, d->code);
13295 /* Add the remaining MMX insns with somewhat more complicated types. */
13296 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13297 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13298 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13299 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13300 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13302 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13303 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13304 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13306 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13307 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13309 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13310 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13312 /* comi/ucomi insns. */
13313 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13314 if (d->mask == MASK_SSE2)
13315 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13317 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13319 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13320 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13321 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13323 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13324 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13325 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13326 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13327 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13328 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13329 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13330 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13331 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13332 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13333 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13335 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13336 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13338 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13340 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13341 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13342 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13343 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13344 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13345 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13347 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13348 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13349 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13350 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13352 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13353 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13354 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13355 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13357 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13359 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13361 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13362 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13363 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13364 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13365 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13366 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13368 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13370 /* Original 3DNow! */
13371 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13372 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13373 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13374 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13375 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13376 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13377 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13378 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13379 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13380 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13381 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13382 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13383 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13384 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13385 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13386 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13387 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13388 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13389 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13390 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13392 /* 3DNow! extension as used in the Athlon CPU. */
13393 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13394 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13395 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13396 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13397 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13398 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13400 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13404 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13406 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13407 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13408 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13410 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13411 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13412 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13413 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13414 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13415 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13417 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13418 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13419 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13420 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13422 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13423 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13424 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13425 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13426 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13428 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13429 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13430 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13431 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13433 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13434 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13436 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13438 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13439 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13441 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13442 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13443 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13444 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13445 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13447 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13449 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13450 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13451 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13452 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13454 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13455 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13456 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13458 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13459 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13460 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13461 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13463 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13464 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13465 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13466 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13467 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13468 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13469 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13471 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13472 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13473 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13475 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13476 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13477 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13478 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13479 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13480 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13481 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13483 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13485 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13486 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13487 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13489 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13490 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13491 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13493 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13494 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13496 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13497 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13498 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13499 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13501 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13502 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13503 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13504 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13506 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13507 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13509 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13511 /* Prescott New Instructions. */
13512 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13513 void_ftype_pcvoid_unsigned_unsigned,
13514 IX86_BUILTIN_MONITOR);
13515 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13516 void_ftype_unsigned_unsigned,
13517 IX86_BUILTIN_MWAIT);
13518 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13520 IX86_BUILTIN_MOVSHDUP);
13521 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13523 IX86_BUILTIN_MOVSLDUP);
13524 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13525 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13526 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13527 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13528 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13529 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13532 /* Errors in the source file can cause expand_expr to return const0_rtx
13533 where we expect a vector. To avoid crashing, use one of the vector
13534 clear instructions. */
13536 safe_vector_operand (rtx x, enum machine_mode mode)
13538 if (x != const0_rtx)
13540 x = gen_reg_rtx (mode);
13542 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13543 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13544 : gen_rtx_SUBREG (DImode, x, 0)));
13546 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13547 : gen_rtx_SUBREG (V4SFmode, x, 0),
13548 CONST0_RTX (V4SFmode)));
13552 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13555 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13558 tree arg0 = TREE_VALUE (arglist);
13559 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13560 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13561 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13562 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13563 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13564 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13566 if (VECTOR_MODE_P (mode0))
13567 op0 = safe_vector_operand (op0, mode0);
13568 if (VECTOR_MODE_P (mode1))
13569 op1 = safe_vector_operand (op1, mode1);
13572 || GET_MODE (target) != tmode
13573 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13574 target = gen_reg_rtx (tmode);
13576 if (GET_MODE (op1) == SImode && mode1 == TImode)
13578 rtx x = gen_reg_rtx (V4SImode);
13579 emit_insn (gen_sse2_loadd (x, op1));
13580 op1 = gen_lowpart (TImode, x);
13583 /* In case the insn wants input operands in modes different from
13584 the result, abort. */
13585 if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13586 || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13589 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13590 op0 = copy_to_mode_reg (mode0, op0);
13591 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13592 op1 = copy_to_mode_reg (mode1, op1);
13594 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13595 yet one of the two must not be a memory. This is normally enforced
13596 by expanders, but we didn't bother to create one here. */
13597 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13598 op0 = copy_to_mode_reg (mode0, op0);
13600 pat = GEN_FCN (icode) (target, op0, op1);
13607 /* Subroutine of ix86_expand_builtin to take care of stores. */
13610 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13613 tree arg0 = TREE_VALUE (arglist);
13614 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13615 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13616 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13617 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13618 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13620 if (VECTOR_MODE_P (mode1))
13621 op1 = safe_vector_operand (op1, mode1);
13623 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13624 op1 = copy_to_mode_reg (mode1, op1);
13626 pat = GEN_FCN (icode) (op0, op1);
13632 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13635 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13636 rtx target, int do_load)
13639 tree arg0 = TREE_VALUE (arglist);
13640 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13641 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13642 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13645 || GET_MODE (target) != tmode
13646 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13647 target = gen_reg_rtx (tmode);
13649 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13652 if (VECTOR_MODE_P (mode0))
13653 op0 = safe_vector_operand (op0, mode0);
13655 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13656 op0 = copy_to_mode_reg (mode0, op0);
13659 pat = GEN_FCN (icode) (target, op0);
13666 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13667 sqrtss, rsqrtss, rcpss. */
13670 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13673 tree arg0 = TREE_VALUE (arglist);
13674 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13675 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13676 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13679 || GET_MODE (target) != tmode
13680 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13681 target = gen_reg_rtx (tmode);
13683 if (VECTOR_MODE_P (mode0))
13684 op0 = safe_vector_operand (op0, mode0);
13686 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13687 op0 = copy_to_mode_reg (mode0, op0);
13690 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13691 op1 = copy_to_mode_reg (mode0, op1);
13693 pat = GEN_FCN (icode) (target, op0, op1);
13700 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13703 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
13707 tree arg0 = TREE_VALUE (arglist);
13708 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13709 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13710 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13712 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13713 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13714 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13715 enum rtx_code comparison = d->comparison;
13717 if (VECTOR_MODE_P (mode0))
13718 op0 = safe_vector_operand (op0, mode0);
13719 if (VECTOR_MODE_P (mode1))
13720 op1 = safe_vector_operand (op1, mode1);
13722 /* Swap operands if we have a comparison that isn't available in
13726 rtx tmp = gen_reg_rtx (mode1);
13727 emit_move_insn (tmp, op1);
13733 || GET_MODE (target) != tmode
13734 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13735 target = gen_reg_rtx (tmode);
13737 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13738 op0 = copy_to_mode_reg (mode0, op0);
13739 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13740 op1 = copy_to_mode_reg (mode1, op1);
13742 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13743 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13750 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13753 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
13757 tree arg0 = TREE_VALUE (arglist);
13758 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13759 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13760 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13762 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13763 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13764 enum rtx_code comparison = d->comparison;
13766 if (VECTOR_MODE_P (mode0))
13767 op0 = safe_vector_operand (op0, mode0);
13768 if (VECTOR_MODE_P (mode1))
13769 op1 = safe_vector_operand (op1, mode1);
13771 /* Swap operands if we have a comparison that isn't available in
13780 target = gen_reg_rtx (SImode);
13781 emit_move_insn (target, const0_rtx);
13782 target = gen_rtx_SUBREG (QImode, target, 0);
13784 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13785 op0 = copy_to_mode_reg (mode0, op0);
13786 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13787 op1 = copy_to_mode_reg (mode1, op1);
13789 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13790 pat = GEN_FCN (d->icode) (op0, op1);
13794 emit_insn (gen_rtx_SET (VOIDmode,
13795 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13796 gen_rtx_fmt_ee (comparison, QImode,
13800 return SUBREG_REG (target);
13803 /* Expand an expression EXP that calls a built-in function,
13804 with result going to TARGET if that's convenient
13805 (and in mode MODE if that's convenient).
13806 SUBTARGET may be used as the target for computing one of EXP's operands.
13807 IGNORE is nonzero if the value is to be ignored. */
13810 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13811 enum machine_mode mode ATTRIBUTE_UNUSED,
13812 int ignore ATTRIBUTE_UNUSED)
13814 const struct builtin_description *d;
13816 enum insn_code icode;
13817 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13818 tree arglist = TREE_OPERAND (exp, 1);
13819 tree arg0, arg1, arg2;
13820 rtx op0, op1, op2, pat;
13821 enum machine_mode tmode, mode0, mode1, mode2;
13822 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13826 case IX86_BUILTIN_EMMS:
13827 emit_insn (gen_emms ());
13830 case IX86_BUILTIN_SFENCE:
13831 emit_insn (gen_sfence ());
13834 case IX86_BUILTIN_PEXTRW:
13835 case IX86_BUILTIN_PEXTRW128:
13836 icode = (fcode == IX86_BUILTIN_PEXTRW
13837 ? CODE_FOR_mmx_pextrw
13838 : CODE_FOR_sse2_pextrw);
13839 arg0 = TREE_VALUE (arglist);
13840 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13841 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13842 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13843 tmode = insn_data[icode].operand[0].mode;
13844 mode0 = insn_data[icode].operand[1].mode;
13845 mode1 = insn_data[icode].operand[2].mode;
13847 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13848 op0 = copy_to_mode_reg (mode0, op0);
13849 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13851 error ("selector must be an integer constant in the range 0..%i",
13852 fcode == IX86_BUILTIN_PEXTRW ? 3:7);
13853 return gen_reg_rtx (tmode);
13856 || GET_MODE (target) != tmode
13857 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13858 target = gen_reg_rtx (tmode);
13859 pat = GEN_FCN (icode) (target, op0, op1);
13865 case IX86_BUILTIN_PINSRW:
13866 case IX86_BUILTIN_PINSRW128:
13867 icode = (fcode == IX86_BUILTIN_PINSRW
13868 ? CODE_FOR_mmx_pinsrw
13869 : CODE_FOR_sse2_pinsrw);
13870 arg0 = TREE_VALUE (arglist);
13871 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13872 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13873 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13874 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13875 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13876 tmode = insn_data[icode].operand[0].mode;
13877 mode0 = insn_data[icode].operand[1].mode;
13878 mode1 = insn_data[icode].operand[2].mode;
13879 mode2 = insn_data[icode].operand[3].mode;
13881 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13882 op0 = copy_to_mode_reg (mode0, op0);
13883 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13884 op1 = copy_to_mode_reg (mode1, op1);
13885 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13887 error ("selector must be an integer constant in the range 0..%i",
13888 fcode == IX86_BUILTIN_PINSRW ? 15:255);
13892 || GET_MODE (target) != tmode
13893 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13894 target = gen_reg_rtx (tmode);
13895 pat = GEN_FCN (icode) (target, op0, op1, op2);
13901 case IX86_BUILTIN_MASKMOVQ:
13902 case IX86_BUILTIN_MASKMOVDQU:
13903 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13904 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13905 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13906 : CODE_FOR_sse2_maskmovdqu));
13907 /* Note the arg order is different from the operand order. */
13908 arg1 = TREE_VALUE (arglist);
13909 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13910 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13911 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13912 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13913 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13914 mode0 = insn_data[icode].operand[0].mode;
13915 mode1 = insn_data[icode].operand[1].mode;
13916 mode2 = insn_data[icode].operand[2].mode;
13918 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13919 op0 = copy_to_mode_reg (mode0, op0);
13920 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13921 op1 = copy_to_mode_reg (mode1, op1);
13922 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13923 op2 = copy_to_mode_reg (mode2, op2);
13924 pat = GEN_FCN (icode) (op0, op1, op2);
13930 case IX86_BUILTIN_SQRTSS:
13931 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13932 case IX86_BUILTIN_RSQRTSS:
13933 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13934 case IX86_BUILTIN_RCPSS:
13935 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13937 case IX86_BUILTIN_LOADAPS:
13938 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13940 case IX86_BUILTIN_LOADUPS:
13941 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13943 case IX86_BUILTIN_STOREAPS:
13944 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13946 case IX86_BUILTIN_STOREUPS:
13947 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13949 case IX86_BUILTIN_LOADSS:
13950 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13952 case IX86_BUILTIN_STORESS:
13953 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13955 case IX86_BUILTIN_LOADHPS:
13956 case IX86_BUILTIN_LOADLPS:
13957 case IX86_BUILTIN_LOADHPD:
13958 case IX86_BUILTIN_LOADLPD:
13959 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13960 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13961 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13962 : CODE_FOR_sse2_movlpd);
13963 arg0 = TREE_VALUE (arglist);
13964 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13965 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13966 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13967 tmode = insn_data[icode].operand[0].mode;
13968 mode0 = insn_data[icode].operand[1].mode;
13969 mode1 = insn_data[icode].operand[2].mode;
13971 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13972 op0 = copy_to_mode_reg (mode0, op0);
13973 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13975 || GET_MODE (target) != tmode
13976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13977 target = gen_reg_rtx (tmode);
13978 pat = GEN_FCN (icode) (target, op0, op1);
13984 case IX86_BUILTIN_STOREHPS:
13985 case IX86_BUILTIN_STORELPS:
13986 case IX86_BUILTIN_STOREHPD:
13987 case IX86_BUILTIN_STORELPD:
13988 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13989 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13990 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13991 : CODE_FOR_sse2_movlpd);
13992 arg0 = TREE_VALUE (arglist);
13993 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13994 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13995 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13996 mode0 = insn_data[icode].operand[1].mode;
13997 mode1 = insn_data[icode].operand[2].mode;
13999 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14000 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14001 op1 = copy_to_mode_reg (mode1, op1);
14003 pat = GEN_FCN (icode) (op0, op0, op1);
14009 case IX86_BUILTIN_MOVNTPS:
14010 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14011 case IX86_BUILTIN_MOVNTQ:
14012 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14014 case IX86_BUILTIN_LDMXCSR:
14015 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14016 target = assign_386_stack_local (SImode, 0);
14017 emit_move_insn (target, op0);
14018 emit_insn (gen_ldmxcsr (target));
14021 case IX86_BUILTIN_STMXCSR:
14022 target = assign_386_stack_local (SImode, 0);
14023 emit_insn (gen_stmxcsr (target));
14024 return copy_to_mode_reg (SImode, target);
14026 case IX86_BUILTIN_SHUFPS:
14027 case IX86_BUILTIN_SHUFPD:
14028 icode = (fcode == IX86_BUILTIN_SHUFPS
14029 ? CODE_FOR_sse_shufps
14030 : CODE_FOR_sse2_shufpd);
14031 arg0 = TREE_VALUE (arglist);
14032 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14033 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14034 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14035 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14036 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14037 tmode = insn_data[icode].operand[0].mode;
14038 mode0 = insn_data[icode].operand[1].mode;
14039 mode1 = insn_data[icode].operand[2].mode;
14040 mode2 = insn_data[icode].operand[3].mode;
14042 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14043 op0 = copy_to_mode_reg (mode0, op0);
14044 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14045 op1 = copy_to_mode_reg (mode1, op1);
14046 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14048 /* @@@ better error message */
14049 error ("mask must be an immediate");
14050 return gen_reg_rtx (tmode);
14053 || GET_MODE (target) != tmode
14054 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14055 target = gen_reg_rtx (tmode);
14056 pat = GEN_FCN (icode) (target, op0, op1, op2);
14062 case IX86_BUILTIN_PSHUFW:
14063 case IX86_BUILTIN_PSHUFD:
14064 case IX86_BUILTIN_PSHUFHW:
14065 case IX86_BUILTIN_PSHUFLW:
14066 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14067 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14068 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14069 : CODE_FOR_mmx_pshufw);
14070 arg0 = TREE_VALUE (arglist);
14071 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14072 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14073 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14074 tmode = insn_data[icode].operand[0].mode;
14075 mode1 = insn_data[icode].operand[1].mode;
14076 mode2 = insn_data[icode].operand[2].mode;
14078 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14079 op0 = copy_to_mode_reg (mode1, op0);
14080 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14082 /* @@@ better error message */
14083 error ("mask must be an immediate");
14087 || GET_MODE (target) != tmode
14088 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14089 target = gen_reg_rtx (tmode);
14090 pat = GEN_FCN (icode) (target, op0, op1);
14096 case IX86_BUILTIN_PSLLDQI128:
14097 case IX86_BUILTIN_PSRLDQI128:
14098 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14099 : CODE_FOR_sse2_lshrti3);
14100 arg0 = TREE_VALUE (arglist);
14101 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14102 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14103 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14104 tmode = insn_data[icode].operand[0].mode;
14105 mode1 = insn_data[icode].operand[1].mode;
14106 mode2 = insn_data[icode].operand[2].mode;
14108 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14110 op0 = copy_to_reg (op0);
14111 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14113 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14115 error ("shift must be an immediate");
14118 target = gen_reg_rtx (V2DImode);
14119 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14125 case IX86_BUILTIN_FEMMS:
14126 emit_insn (gen_femms ());
14129 case IX86_BUILTIN_PAVGUSB:
14130 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14132 case IX86_BUILTIN_PF2ID:
14133 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14135 case IX86_BUILTIN_PFACC:
14136 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14138 case IX86_BUILTIN_PFADD:
14139 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14141 case IX86_BUILTIN_PFCMPEQ:
14142 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14144 case IX86_BUILTIN_PFCMPGE:
14145 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14147 case IX86_BUILTIN_PFCMPGT:
14148 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14150 case IX86_BUILTIN_PFMAX:
14151 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14153 case IX86_BUILTIN_PFMIN:
14154 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14156 case IX86_BUILTIN_PFMUL:
14157 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14159 case IX86_BUILTIN_PFRCP:
14160 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14162 case IX86_BUILTIN_PFRCPIT1:
14163 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14165 case IX86_BUILTIN_PFRCPIT2:
14166 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14168 case IX86_BUILTIN_PFRSQIT1:
14169 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14171 case IX86_BUILTIN_PFRSQRT:
14172 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14174 case IX86_BUILTIN_PFSUB:
14175 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14177 case IX86_BUILTIN_PFSUBR:
14178 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14180 case IX86_BUILTIN_PI2FD:
14181 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14183 case IX86_BUILTIN_PMULHRW:
14184 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14186 case IX86_BUILTIN_PF2IW:
14187 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14189 case IX86_BUILTIN_PFNACC:
14190 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14192 case IX86_BUILTIN_PFPNACC:
14193 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14195 case IX86_BUILTIN_PI2FW:
14196 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14198 case IX86_BUILTIN_PSWAPDSI:
14199 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14201 case IX86_BUILTIN_PSWAPDSF:
14202 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14204 case IX86_BUILTIN_SSE_ZERO:
14205 target = gen_reg_rtx (V4SFmode);
14206 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14209 case IX86_BUILTIN_MMX_ZERO:
14210 target = gen_reg_rtx (DImode);
14211 emit_insn (gen_mmx_clrdi (target));
14214 case IX86_BUILTIN_CLRTI:
14215 target = gen_reg_rtx (V2DImode);
14216 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14220 case IX86_BUILTIN_SQRTSD:
14221 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14222 case IX86_BUILTIN_LOADAPD:
14223 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14224 case IX86_BUILTIN_LOADUPD:
14225 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14227 case IX86_BUILTIN_STOREAPD:
14228 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14229 case IX86_BUILTIN_STOREUPD:
14230 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14232 case IX86_BUILTIN_LOADSD:
14233 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14235 case IX86_BUILTIN_STORESD:
14236 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14238 case IX86_BUILTIN_SETPD1:
14239 target = assign_386_stack_local (DFmode, 0);
14240 arg0 = TREE_VALUE (arglist);
14241 emit_move_insn (adjust_address (target, DFmode, 0),
14242 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14243 op0 = gen_reg_rtx (V2DFmode);
14244 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14245 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14248 case IX86_BUILTIN_SETPD:
14249 target = assign_386_stack_local (V2DFmode, 0);
14250 arg0 = TREE_VALUE (arglist);
14251 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14252 emit_move_insn (adjust_address (target, DFmode, 0),
14253 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14254 emit_move_insn (adjust_address (target, DFmode, 8),
14255 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14256 op0 = gen_reg_rtx (V2DFmode);
14257 emit_insn (gen_sse2_movapd (op0, target));
14260 case IX86_BUILTIN_LOADRPD:
14261 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14262 gen_reg_rtx (V2DFmode), 1);
14263 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14266 case IX86_BUILTIN_LOADPD1:
14267 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14268 gen_reg_rtx (V2DFmode), 1);
14269 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14272 case IX86_BUILTIN_STOREPD1:
14273 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14274 case IX86_BUILTIN_STORERPD:
14275 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14277 case IX86_BUILTIN_CLRPD:
14278 target = gen_reg_rtx (V2DFmode);
14279 emit_insn (gen_sse_clrv2df (target));
14282 case IX86_BUILTIN_MFENCE:
14283 emit_insn (gen_sse2_mfence ());
14285 case IX86_BUILTIN_LFENCE:
14286 emit_insn (gen_sse2_lfence ());
14289 case IX86_BUILTIN_CLFLUSH:
14290 arg0 = TREE_VALUE (arglist);
14291 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14292 icode = CODE_FOR_sse2_clflush;
14293 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14294 op0 = copy_to_mode_reg (Pmode, op0);
14296 emit_insn (gen_sse2_clflush (op0));
14299 case IX86_BUILTIN_MOVNTPD:
14300 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14301 case IX86_BUILTIN_MOVNTDQ:
14302 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14303 case IX86_BUILTIN_MOVNTI:
14304 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14306 case IX86_BUILTIN_LOADDQA:
14307 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14308 case IX86_BUILTIN_LOADDQU:
14309 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14310 case IX86_BUILTIN_LOADD:
14311 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14313 case IX86_BUILTIN_STOREDQA:
14314 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14315 case IX86_BUILTIN_STOREDQU:
14316 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14317 case IX86_BUILTIN_STORED:
14318 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14320 case IX86_BUILTIN_MONITOR:
14321 arg0 = TREE_VALUE (arglist);
14322 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14323 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14324 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14325 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14326 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14328 op0 = copy_to_mode_reg (SImode, op0);
14330 op1 = copy_to_mode_reg (SImode, op1);
14332 op2 = copy_to_mode_reg (SImode, op2);
14333 emit_insn (gen_monitor (op0, op1, op2));
14336 case IX86_BUILTIN_MWAIT:
14337 arg0 = TREE_VALUE (arglist);
14338 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14339 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14340 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14342 op0 = copy_to_mode_reg (SImode, op0);
14344 op1 = copy_to_mode_reg (SImode, op1);
14345 emit_insn (gen_mwait (op0, op1));
14348 case IX86_BUILTIN_LOADDDUP:
14349 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14351 case IX86_BUILTIN_LDDQU:
14352 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14359 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14360 if (d->code == fcode)
14362 /* Compares are treated specially. */
14363 if (d->icode == CODE_FOR_maskcmpv4sf3
14364 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14365 || d->icode == CODE_FOR_maskncmpv4sf3
14366 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14367 || d->icode == CODE_FOR_maskcmpv2df3
14368 || d->icode == CODE_FOR_vmmaskcmpv2df3
14369 || d->icode == CODE_FOR_maskncmpv2df3
14370 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14371 return ix86_expand_sse_compare (d, arglist, target);
14373 return ix86_expand_binop_builtin (d->icode, arglist, target);
14376 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14377 if (d->code == fcode)
14378 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14380 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14381 if (d->code == fcode)
14382 return ix86_expand_sse_comi (d, arglist, target);
14384 /* @@@ Should really do something sensible here. */
14388 /* Store OPERAND to the memory after reload is completed. This means
14389 that we can't easily use assign_stack_local. */
14391 ix86_force_to_memory (enum machine_mode mode, rtx operand)
14394 if (!reload_completed)
14396 if (TARGET_RED_ZONE)
14398 result = gen_rtx_MEM (mode,
14399 gen_rtx_PLUS (Pmode,
14401 GEN_INT (-RED_ZONE_SIZE)));
14402 emit_move_insn (result, operand);
14404 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14410 operand = gen_lowpart (DImode, operand);
14414 gen_rtx_SET (VOIDmode,
14415 gen_rtx_MEM (DImode,
14416 gen_rtx_PRE_DEC (DImode,
14417 stack_pointer_rtx)),
14423 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14432 split_di (&operand, 1, operands, operands + 1);
14434 gen_rtx_SET (VOIDmode,
14435 gen_rtx_MEM (SImode,
14436 gen_rtx_PRE_DEC (Pmode,
14437 stack_pointer_rtx)),
14440 gen_rtx_SET (VOIDmode,
14441 gen_rtx_MEM (SImode,
14442 gen_rtx_PRE_DEC (Pmode,
14443 stack_pointer_rtx)),
14448 /* It is better to store HImodes as SImodes. */
14449 if (!TARGET_PARTIAL_REG_STALL)
14450 operand = gen_lowpart (SImode, operand);
14454 gen_rtx_SET (VOIDmode,
14455 gen_rtx_MEM (GET_MODE (operand),
14456 gen_rtx_PRE_DEC (SImode,
14457 stack_pointer_rtx)),
14463 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14468 /* Free operand from the memory. */
14470 ix86_free_from_memory (enum machine_mode mode)
14472 if (!TARGET_RED_ZONE)
14476 if (mode == DImode || TARGET_64BIT)
14478 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14482 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14483 to pop or add instruction if registers are available. */
14484 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14485 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14490 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14491 QImode must go into class Q_REGS.
14492 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14493 movdf to do mem-to-mem moves through integer regs. */
14495 ix86_preferred_reload_class (rtx x, enum reg_class class)
14497 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14499 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14501 /* SSE can't load any constant directly yet. */
14502 if (SSE_CLASS_P (class))
14504 /* Floats can load 0 and 1. */
14505 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14507 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14508 if (MAYBE_SSE_CLASS_P (class))
14509 return (reg_class_subset_p (class, GENERAL_REGS)
14510 ? GENERAL_REGS : FLOAT_REGS);
14514 /* General regs can load everything. */
14515 if (reg_class_subset_p (class, GENERAL_REGS))
14516 return GENERAL_REGS;
14517 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14518 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14521 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14523 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14528 /* If we are copying between general and FP registers, we need a memory
14529 location. The same is true for SSE and MMX registers.
14531 The macro can't work reliably when one of the CLASSES is class containing
14532 registers from multiple units (SSE, MMX, integer). We avoid this by never
14533 combining those units in single alternative in the machine description.
14534 Ensure that this constraint holds to avoid unexpected surprises.
14536 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14537 enforce these sanity checks. */
14539 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14540 enum machine_mode mode, int strict)
14542 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14543 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14544 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14545 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14546 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14547 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14554 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14555 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14556 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14557 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14558 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14560 /* Return the cost of moving data from a register in class CLASS1 to
14561 one in class CLASS2.
14563 It is not required that the cost always equal 2 when FROM is the same as TO;
14564 on some machines it is expensive to move between registers if they are not
14565 general registers. */
14567 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14568 enum reg_class class2)
14570 /* In case we require secondary memory, compute cost of the store followed
14571 by load. In order to avoid bad register allocation choices, we need
14572 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14574 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14578 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14579 MEMORY_MOVE_COST (mode, class1, 1));
14580 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14581 MEMORY_MOVE_COST (mode, class2, 1));
14583 /* In case of copying from general_purpose_register we may emit multiple
14584 stores followed by single load causing memory size mismatch stall.
14585 Count this as arbitrarily high cost of 20. */
14586 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14589 /* In the case of FP/MMX moves, the registers actually overlap, and we
14590 have to switch modes in order to treat them differently. */
14591 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14592 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14598 /* Moves between SSE/MMX and integer unit are expensive. */
14599 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14600 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14601 return ix86_cost->mmxsse_to_integer;
14602 if (MAYBE_FLOAT_CLASS_P (class1))
14603 return ix86_cost->fp_move;
14604 if (MAYBE_SSE_CLASS_P (class1))
14605 return ix86_cost->sse_move;
14606 if (MAYBE_MMX_CLASS_P (class1))
14607 return ix86_cost->mmx_move;
14611 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14613 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14615 /* Flags and only flags can only hold CCmode values. */
14616 if (CC_REGNO_P (regno))
14617 return GET_MODE_CLASS (mode) == MODE_CC;
14618 if (GET_MODE_CLASS (mode) == MODE_CC
14619 || GET_MODE_CLASS (mode) == MODE_RANDOM
14620 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14622 if (FP_REGNO_P (regno))
14623 return VALID_FP_MODE_P (mode);
14624 if (SSE_REGNO_P (regno))
14625 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14626 if (MMX_REGNO_P (regno))
14628 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14629 /* We handle both integer and floats in the general purpose registers.
14630 In future we should be able to handle vector modes as well. */
14631 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14633 /* Take care for QImode values - they can be in non-QI regs, but then
14634 they do cause partial register stalls. */
14635 if (regno < 4 || mode != QImode || TARGET_64BIT)
14637 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14640 /* Return the cost of moving data of mode M between a
14641 register and memory. A value of 2 is the default; this cost is
14642 relative to those in `REGISTER_MOVE_COST'.
14644 If moving between registers and memory is more expensive than
14645 between two registers, you should define this macro to express the
14648 Model also increased moving costs of QImode registers in non
14652 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14654 if (FLOAT_CLASS_P (class))
14672 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14674 if (SSE_CLASS_P (class))
14677 switch (GET_MODE_SIZE (mode))
14691 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14693 if (MMX_CLASS_P (class))
14696 switch (GET_MODE_SIZE (mode))
14707 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14709 switch (GET_MODE_SIZE (mode))
14713 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14714 : ix86_cost->movzbl_load);
14716 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14717 : ix86_cost->int_store[0] + 4);
14720 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14722 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14723 if (mode == TFmode)
14725 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14726 * (((int) GET_MODE_SIZE (mode)
14727 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14731 /* Compute a (partial) cost for rtx X. Return true if the complete
14732 cost has been computed, and false if subexpressions should be
14733 scanned. In either case, *TOTAL contains the cost result. */
14736 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
14738 enum machine_mode mode = GET_MODE (x);
14746 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14748 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14750 else if (flag_pic && SYMBOLIC_CONST (x)
14752 || (!GET_CODE (x) != LABEL_REF
14753 && (GET_CODE (x) != SYMBOL_REF
14754 || !SYMBOL_REF_LOCAL_P (x)))))
14761 if (mode == VOIDmode)
14764 switch (standard_80387_constant_p (x))
14769 default: /* Other constants */
14774 /* Start with (MEM (SYMBOL_REF)), since that's where
14775 it'll probably end up. Add a penalty for size. */
14776 *total = (COSTS_N_INSNS (1)
14777 + (flag_pic != 0 && !TARGET_64BIT)
14778 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14784 /* The zero extensions is often completely free on x86_64, so make
14785 it as cheap as possible. */
14786 if (TARGET_64BIT && mode == DImode
14787 && GET_MODE (XEXP (x, 0)) == SImode)
14789 else if (TARGET_ZERO_EXTEND_WITH_AND)
14790 *total = COSTS_N_INSNS (ix86_cost->add);
14792 *total = COSTS_N_INSNS (ix86_cost->movzx);
14796 *total = COSTS_N_INSNS (ix86_cost->movsx);
14800 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14801 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14803 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14806 *total = COSTS_N_INSNS (ix86_cost->add);
14809 if ((value == 2 || value == 3)
14810 && !TARGET_DECOMPOSE_LEA
14811 && ix86_cost->lea <= ix86_cost->shift_const)
14813 *total = COSTS_N_INSNS (ix86_cost->lea);
14823 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14825 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14827 if (INTVAL (XEXP (x, 1)) > 32)
14828 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14830 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14834 if (GET_CODE (XEXP (x, 1)) == AND)
14835 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14837 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14842 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14843 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14845 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14850 if (FLOAT_MODE_P (mode))
14851 *total = COSTS_N_INSNS (ix86_cost->fmul);
14852 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14854 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14857 for (nbits = 0; value != 0; value >>= 1)
14860 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14861 + nbits * ix86_cost->mult_bit);
14865 /* This is arbitrary */
14866 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14867 + 7 * ix86_cost->mult_bit);
14875 if (FLOAT_MODE_P (mode))
14876 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14878 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14882 if (FLOAT_MODE_P (mode))
14883 *total = COSTS_N_INSNS (ix86_cost->fadd);
14884 else if (!TARGET_DECOMPOSE_LEA
14885 && GET_MODE_CLASS (mode) == MODE_INT
14886 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14888 if (GET_CODE (XEXP (x, 0)) == PLUS
14889 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14890 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14891 && CONSTANT_P (XEXP (x, 1)))
14893 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14894 if (val == 2 || val == 4 || val == 8)
14896 *total = COSTS_N_INSNS (ix86_cost->lea);
14897 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14898 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14900 *total += rtx_cost (XEXP (x, 1), outer_code);
14904 else if (GET_CODE (XEXP (x, 0)) == MULT
14905 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14907 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14908 if (val == 2 || val == 4 || val == 8)
14910 *total = COSTS_N_INSNS (ix86_cost->lea);
14911 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14912 *total += rtx_cost (XEXP (x, 1), outer_code);
14916 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14918 *total = COSTS_N_INSNS (ix86_cost->lea);
14919 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14920 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14921 *total += rtx_cost (XEXP (x, 1), outer_code);
14928 if (FLOAT_MODE_P (mode))
14930 *total = COSTS_N_INSNS (ix86_cost->fadd);
14938 if (!TARGET_64BIT && mode == DImode)
14940 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14941 + (rtx_cost (XEXP (x, 0), outer_code)
14942 << (GET_MODE (XEXP (x, 0)) != DImode))
14943 + (rtx_cost (XEXP (x, 1), outer_code)
14944 << (GET_MODE (XEXP (x, 1)) != DImode)));
14950 if (FLOAT_MODE_P (mode))
14952 *total = COSTS_N_INSNS (ix86_cost->fchs);
14958 if (!TARGET_64BIT && mode == DImode)
14959 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14961 *total = COSTS_N_INSNS (ix86_cost->add);
14965 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14970 if (FLOAT_MODE_P (mode))
14971 *total = COSTS_N_INSNS (ix86_cost->fabs);
14975 if (FLOAT_MODE_P (mode))
14976 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14980 if (XINT (x, 1) == UNSPEC_TP)
14989 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14991 ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
14994 fputs ("\tpushl $", asm_out_file);
14995 assemble_name (asm_out_file, XSTR (symbol, 0));
14996 fputc ('\n', asm_out_file);
15002 static int current_machopic_label_num;
15004 /* Given a symbol name and its associated stub, write out the
15005 definition of the stub. */
15008 machopic_output_stub (FILE *file, const char *symb, const char *stub)
15010 unsigned int length;
15011 char *binder_name, *symbol_name, lazy_ptr_name[32];
15012 int label = ++current_machopic_label_num;
15014 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15015 symb = (*targetm.strip_name_encoding) (symb);
15017 length = strlen (stub);
15018 binder_name = alloca (length + 32);
15019 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15021 length = strlen (symb);
15022 symbol_name = alloca (length + 32);
15023 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15025 sprintf (lazy_ptr_name, "L%d$lz", label);
15028 machopic_picsymbol_stub_section ();
15030 machopic_symbol_stub_section ();
15032 fprintf (file, "%s:\n", stub);
15033 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15037 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15038 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15039 fprintf (file, "\tjmp %%edx\n");
15042 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15044 fprintf (file, "%s:\n", binder_name);
15048 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15049 fprintf (file, "\tpushl %%eax\n");
15052 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15054 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15056 machopic_lazy_symbol_ptr_section ();
15057 fprintf (file, "%s:\n", lazy_ptr_name);
15058 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15059 fprintf (file, "\t.long %s\n", binder_name);
15061 #endif /* TARGET_MACHO */
15063 /* Order the registers for register allocator. */
15066 x86_order_regs_for_local_alloc (void)
15071 /* First allocate the local general purpose registers. */
15072 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15073 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15074 reg_alloc_order [pos++] = i;
15076 /* Global general purpose registers. */
15077 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15078 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15079 reg_alloc_order [pos++] = i;
15081 /* x87 registers come first in case we are doing FP math
15083 if (!TARGET_SSE_MATH)
15084 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15085 reg_alloc_order [pos++] = i;
15087 /* SSE registers. */
15088 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15089 reg_alloc_order [pos++] = i;
15090 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15091 reg_alloc_order [pos++] = i;
15093 /* x87 registers. */
15094 if (TARGET_SSE_MATH)
15095 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15096 reg_alloc_order [pos++] = i;
15098 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15099 reg_alloc_order [pos++] = i;
15101 /* Initialize the rest of array as we do not allocate some registers
15103 while (pos < FIRST_PSEUDO_REGISTER)
15104 reg_alloc_order [pos++] = 0;
15107 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15108 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15111 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15112 struct attribute_spec.handler. */
15114 ix86_handle_struct_attribute (tree *node, tree name,
15115 tree args ATTRIBUTE_UNUSED,
15116 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15119 if (DECL_P (*node))
15121 if (TREE_CODE (*node) == TYPE_DECL)
15122 type = &TREE_TYPE (*node);
15127 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15128 || TREE_CODE (*type) == UNION_TYPE)))
15130 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15131 *no_add_attrs = true;
15134 else if ((is_attribute_p ("ms_struct", name)
15135 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15136 || ((is_attribute_p ("gcc_struct", name)
15137 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15139 warning ("`%s' incompatible attribute ignored",
15140 IDENTIFIER_POINTER (name));
15141 *no_add_attrs = true;
15148 ix86_ms_bitfield_layout_p (tree record_type)
15150 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15151 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15152 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15155 /* Returns an expression indicating where the this parameter is
15156 located on entry to the FUNCTION. */
15159 x86_this_parameter (tree function)
15161 tree type = TREE_TYPE (function);
15165 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15166 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15169 if (ix86_function_regparm (type, function) > 0)
15173 parm = TYPE_ARG_TYPES (type);
15174 /* Figure out whether or not the function has a variable number of
15176 for (; parm; parm = TREE_CHAIN (parm))
15177 if (TREE_VALUE (parm) == void_type_node)
15179 /* If not, the this parameter is in the first argument. */
15183 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15185 return gen_rtx_REG (SImode, 0);
15189 if (aggregate_value_p (TREE_TYPE (type), type))
15190 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15192 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15195 /* Determine whether x86_output_mi_thunk can succeed. */
15198 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15199 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15200 HOST_WIDE_INT vcall_offset, tree function)
15202 /* 64-bit can handle anything. */
15206 /* For 32-bit, everything's fine if we have one free register. */
15207 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15210 /* Need a free register for vcall_offset. */
15214 /* Need a free register for GOT references. */
15215 if (flag_pic && !(*targetm.binds_local_p) (function))
15218 /* Otherwise ok. */
15222 /* Output the assembler code for a thunk function. THUNK_DECL is the
15223 declaration for the thunk function itself, FUNCTION is the decl for
15224 the target function. DELTA is an immediate constant offset to be
15225 added to THIS. If VCALL_OFFSET is nonzero, the word at
15226 *(*this + vcall_offset) should be added to THIS. */
15229 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15230 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15231 HOST_WIDE_INT vcall_offset, tree function)
15234 rtx this = x86_this_parameter (function);
15237 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15238 pull it in now and let DELTA benefit. */
15241 else if (vcall_offset)
15243 /* Put the this parameter into %eax. */
15245 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15246 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15249 this_reg = NULL_RTX;
15251 /* Adjust the this parameter by a fixed constant. */
15254 xops[0] = GEN_INT (delta);
15255 xops[1] = this_reg ? this_reg : this;
15258 if (!x86_64_general_operand (xops[0], DImode))
15260 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15262 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15266 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15269 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15272 /* Adjust the this parameter by a value stored in the vtable. */
15276 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15279 int tmp_regno = 2 /* ECX */;
15280 if (lookup_attribute ("fastcall",
15281 TYPE_ATTRIBUTES (TREE_TYPE (function))))
15282 tmp_regno = 0 /* EAX */;
15283 tmp = gen_rtx_REG (SImode, tmp_regno);
15286 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15289 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15291 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15293 /* Adjust the this parameter. */
15294 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15295 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15297 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15298 xops[0] = GEN_INT (vcall_offset);
15300 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15301 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15303 xops[1] = this_reg;
15305 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15307 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15310 /* If necessary, drop THIS back to its stack slot. */
15311 if (this_reg && this_reg != this)
15313 xops[0] = this_reg;
15315 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15318 xops[0] = XEXP (DECL_RTL (function), 0);
15321 if (!flag_pic || (*targetm.binds_local_p) (function))
15322 output_asm_insn ("jmp\t%P0", xops);
15325 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15326 tmp = gen_rtx_CONST (Pmode, tmp);
15327 tmp = gen_rtx_MEM (QImode, tmp);
15329 output_asm_insn ("jmp\t%A0", xops);
15334 if (!flag_pic || (*targetm.binds_local_p) (function))
15335 output_asm_insn ("jmp\t%P0", xops);
15340 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15341 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15342 tmp = gen_rtx_MEM (QImode, tmp);
15344 output_asm_insn ("jmp\t%0", xops);
15347 #endif /* TARGET_MACHO */
15349 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15350 output_set_got (tmp);
15353 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15354 output_asm_insn ("jmp\t{*}%1", xops);
15360 x86_file_start (void)
15362 default_file_start ();
15363 if (X86_FILE_START_VERSION_DIRECTIVE)
15364 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15365 if (X86_FILE_START_FLTUSED)
15366 fputs ("\t.global\t__fltused\n", asm_out_file);
15367 if (ix86_asm_dialect == ASM_INTEL)
15368 fputs ("\t.intel_syntax\n", asm_out_file);
15372 x86_field_alignment (tree field, int computed)
15374 enum machine_mode mode;
15375 tree type = TREE_TYPE (field);
15377 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15379 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15380 ? get_inner_array_type (type) : type);
15381 if (mode == DFmode || mode == DCmode
15382 || GET_MODE_CLASS (mode) == MODE_INT
15383 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15384 return MIN (32, computed);
15388 /* Output assembler code to FILE to increment profiler label # LABELNO
15389 for profiling a function entry. */
15391 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15396 #ifndef NO_PROFILE_COUNTERS
15397 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15399 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15403 #ifndef NO_PROFILE_COUNTERS
15404 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15406 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15410 #ifndef NO_PROFILE_COUNTERS
15411 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15412 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15414 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15418 #ifndef NO_PROFILE_COUNTERS
15419 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15420 PROFILE_COUNT_REGISTER);
15422 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15426 /* We don't have exact information about the insn sizes, but we may assume
15427 quite safely that we are informed about all 1 byte insns and memory
15428 address sizes. This is enough to eliminate unnecessary padding in
15432 min_insn_size (rtx insn)
15436 if (!INSN_P (insn) || !active_insn_p (insn))
15439 /* Discard alignments we've emit and jump instructions. */
15440 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15441 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15443 if (GET_CODE (insn) == JUMP_INSN
15444 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15445 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15448 /* Important case - calls are always 5 bytes.
15449 It is common to have many calls in the row. */
15450 if (GET_CODE (insn) == CALL_INSN
15451 && symbolic_reference_mentioned_p (PATTERN (insn))
15452 && !SIBLING_CALL_P (insn))
15454 if (get_attr_length (insn) <= 1)
15457 /* For normal instructions we may rely on the sizes of addresses
15458 and the presence of symbol to require 4 bytes of encoding.
15459 This is not the case for jumps where references are PC relative. */
15460 if (GET_CODE (insn) != JUMP_INSN)
15462 l = get_attr_length_address (insn);
15463 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15472 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15476 k8_avoid_jump_misspredicts (void)
15478 rtx insn, start = get_insns ();
15479 int nbytes = 0, njumps = 0;
15482 /* Look for all minimal intervals of instructions containing 4 jumps.
15483 The intervals are bounded by START and INSN. NBYTES is the total
15484 size of instructions in the interval including INSN and not including
15485 START. When the NBYTES is smaller than 16 bytes, it is possible
15486 that the end of START and INSN ends up in the same 16byte page.
15488 The smallest offset in the page INSN can start is the case where START
15489 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15490 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15492 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15495 nbytes += min_insn_size (insn);
15497 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15498 INSN_UID (insn), min_insn_size (insn));
15499 if ((GET_CODE (insn) == JUMP_INSN
15500 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15501 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15502 || GET_CODE (insn) == CALL_INSN)
15509 start = NEXT_INSN (start);
15510 if ((GET_CODE (start) == JUMP_INSN
15511 && GET_CODE (PATTERN (start)) != ADDR_VEC
15512 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15513 || GET_CODE (start) == CALL_INSN)
15514 njumps--, isjump = 1;
15517 nbytes -= min_insn_size (start);
15522 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15523 INSN_UID (start), INSN_UID (insn), nbytes);
15525 if (njumps == 3 && isjump && nbytes < 16)
15527 int padsize = 15 - nbytes + min_insn_size (insn);
15530 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15531 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15536 /* Implement machine specific optimizations.
15537 At the moment we implement single transformation: AMD Athlon works faster
15538 when RET is not destination of conditional jump or directly preceded
15539 by other jump instruction. We avoid the penalty by inserting NOP just
15540 before the RET instructions in such cases. */
15546 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15548 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15550 basic_block bb = e->src;
15553 bool replace = false;
15555 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15556 || !maybe_hot_bb_p (bb))
15558 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15559 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15561 if (prev && GET_CODE (prev) == CODE_LABEL)
15564 for (e = bb->pred; e; e = e->pred_next)
15565 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15566 && !(e->flags & EDGE_FALLTHRU))
15571 prev = prev_active_insn (ret);
15573 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15574 || GET_CODE (prev) == CALL_INSN))
15576 /* Empty functions get branch mispredict even when the jump destination
15577 is not visible to us. */
15578 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15583 emit_insn_before (gen_return_internal_long (), ret);
15587 k8_avoid_jump_misspredicts ();
15590 /* Return nonzero when QImode register that must be represented via REX prefix
15593 x86_extended_QIreg_mentioned_p (rtx insn)
15596 extract_insn_cached (insn);
15597 for (i = 0; i < recog_data.n_operands; i++)
15598 if (REG_P (recog_data.operand[i])
15599 && REGNO (recog_data.operand[i]) >= 4)
15604 /* Return nonzero when P points to register encoded via REX prefix.
15605 Called via for_each_rtx. */
15607 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15609 unsigned int regno;
15612 regno = REGNO (*p);
15613 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15616 /* Return true when INSN mentions register that must be encoded using REX
15619 x86_extended_reg_mentioned_p (rtx insn)
15621 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15624 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
15625 optabs would emit if we didn't have TFmode patterns. */
15628 x86_emit_floatuns (rtx operands[2])
15630 rtx neglab, donelab, i0, i1, f0, in, out;
15631 enum machine_mode mode, inmode;
15633 inmode = GET_MODE (operands[1]);
15634 if (inmode != SImode
15635 && inmode != DImode)
15639 in = force_reg (inmode, operands[1]);
15640 mode = GET_MODE (out);
15641 neglab = gen_label_rtx ();
15642 donelab = gen_label_rtx ();
15643 i1 = gen_reg_rtx (Pmode);
15644 f0 = gen_reg_rtx (mode);
15646 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15648 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15649 emit_jump_insn (gen_jump (donelab));
15652 emit_label (neglab);
15654 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15655 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15656 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15657 expand_float (f0, i0, 0);
15658 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15660 emit_label (donelab);
15663 /* Return if we do not know how to pass TYPE solely in registers. */
15665 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
15667 if (default_must_pass_in_stack (mode, type))
15669 return (!TARGET_64BIT && type && mode == TImode);
15672 #include "gt-i386.h"