1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
786 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
787 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
788 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name PARAMS ((void));
791 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
792 static rtx maybe_get_pool_constant PARAMS ((rtx));
793 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
794 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
796 static rtx get_thread_pointer PARAMS ((int));
797 static rtx legitimize_tls_address PARAMS ((rtx, enum tls_model, int));
798 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
799 static rtx gen_push PARAMS ((rtx));
800 static int memory_address_length PARAMS ((rtx addr));
801 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
802 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
803 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
804 static void ix86_dump_ppro_packet PARAMS ((FILE *));
805 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
806 static struct machine_function * ix86_init_machine_status PARAMS ((void));
807 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
808 static int ix86_nsaved_regs PARAMS ((void));
809 static void ix86_emit_save_regs PARAMS ((void));
810 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
811 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
812 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
813 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
814 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
815 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
816 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
817 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
818 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
819 static int ix86_issue_rate PARAMS ((void));
820 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
821 static void ix86_sched_init PARAMS ((FILE *, int, int));
822 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
823 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
824 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
825 static int ia32_multipass_dfa_lookahead PARAMS ((void));
826 static void ix86_init_mmx_sse_builtins PARAMS ((void));
827 static rtx x86_this_parameter PARAMS ((tree));
828 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree));
830 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static void x86_file_start PARAMS ((void));
833 static void ix86_reorg PARAMS ((void));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
851 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
853 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
854 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
855 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
856 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
857 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
858 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
859 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
863 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
865 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
866 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
867 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
869 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
870 static int ix86_save_reg PARAMS ((unsigned int, int));
871 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
872 static int ix86_comp_type_attributes PARAMS ((tree, tree));
873 static int ix86_fntype_regparm PARAMS ((tree));
874 const struct attribute_spec ix86_attribute_table[];
875 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
876 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
877 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
878 static int ix86_value_regno PARAMS ((enum machine_mode));
879 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
880 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
881 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
883 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
884 static int min_insn_size PARAMS ((rtx));
885 static void k8_avoid_jump_misspredicts PARAMS ((void));
887 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
888 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
891 /* Register class used for passing given 64bit part of the argument.
892 These represent classes as documented by the PS ABI, with the exception
893 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
894 use SF or DFmode move instead of DImode to avoid reformatting penalties.
896 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
897 whenever possible (upper half does contain padding).
899 enum x86_64_reg_class
902 X86_64_INTEGER_CLASS,
903 X86_64_INTEGERSI_CLASS,
912 static const char * const x86_64_reg_class_name[] =
913 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
915 #define MAX_CLASSES 4
916 static int classify_argument PARAMS ((enum machine_mode, tree,
917 enum x86_64_reg_class [MAX_CLASSES],
919 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
921 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
923 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
924 enum x86_64_reg_class));
926 /* Table of constants used by fldpi, fldln2, etc... */
927 static REAL_VALUE_TYPE ext_80387_constants_table [5];
928 static bool ext_80387_constants_init = 0;
929 static void init_ext_80387_constants PARAMS ((void));
931 /* Initialize the GCC target structure. */
932 #undef TARGET_ATTRIBUTE_TABLE
933 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
934 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
935 # undef TARGET_MERGE_DECL_ATTRIBUTES
936 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
939 #undef TARGET_COMP_TYPE_ATTRIBUTES
940 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
942 #undef TARGET_INIT_BUILTINS
943 #define TARGET_INIT_BUILTINS ix86_init_builtins
945 #undef TARGET_EXPAND_BUILTIN
946 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
948 #undef TARGET_ASM_FUNCTION_EPILOGUE
949 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
951 #undef TARGET_ASM_OPEN_PAREN
952 #define TARGET_ASM_OPEN_PAREN ""
953 #undef TARGET_ASM_CLOSE_PAREN
954 #define TARGET_ASM_CLOSE_PAREN ""
956 #undef TARGET_ASM_ALIGNED_HI_OP
957 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
958 #undef TARGET_ASM_ALIGNED_SI_OP
959 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
961 #undef TARGET_ASM_ALIGNED_DI_OP
962 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
965 #undef TARGET_ASM_UNALIGNED_HI_OP
966 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
967 #undef TARGET_ASM_UNALIGNED_SI_OP
968 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
969 #undef TARGET_ASM_UNALIGNED_DI_OP
970 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
972 #undef TARGET_SCHED_ADJUST_COST
973 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
974 #undef TARGET_SCHED_ISSUE_RATE
975 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
976 #undef TARGET_SCHED_VARIABLE_ISSUE
977 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
978 #undef TARGET_SCHED_INIT
979 #define TARGET_SCHED_INIT ix86_sched_init
980 #undef TARGET_SCHED_REORDER
981 #define TARGET_SCHED_REORDER ix86_sched_reorder
982 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
983 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
984 ia32_use_dfa_pipeline_interface
985 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
986 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
987 ia32_multipass_dfa_lookahead
989 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
990 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
993 #undef TARGET_HAVE_TLS
994 #define TARGET_HAVE_TLS true
996 #undef TARGET_CANNOT_FORCE_CONST_MEM
997 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
999 #undef TARGET_DELEGITIMIZE_ADDRESS
1000 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1002 #undef TARGET_MS_BITFIELD_LAYOUT_P
1003 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1005 #undef TARGET_ASM_OUTPUT_MI_THUNK
1006 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1007 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1008 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1010 #undef TARGET_ASM_FILE_START
1011 #define TARGET_ASM_FILE_START x86_file_start
1013 #undef TARGET_RTX_COSTS
1014 #define TARGET_RTX_COSTS ix86_rtx_costs
1015 #undef TARGET_ADDRESS_COST
1016 #define TARGET_ADDRESS_COST ix86_address_cost
1018 #undef TARGET_MACHINE_DEPENDENT_REORG
1019 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1021 struct gcc_target targetm = TARGET_INITIALIZER;
1023 /* The svr4 ABI for the i386 says that records and unions are returned
1025 #ifndef DEFAULT_PCC_STRUCT_RETURN
1026 #define DEFAULT_PCC_STRUCT_RETURN 1
1029 /* Sometimes certain combinations of command options do not make
1030 sense on a particular target machine. You can define a macro
1031 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1032 defined, is executed once just after all the command options have
1035 Don't use this macro to turn on various extra optimizations for
1036 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1042 /* Comes from final.c -- no real reason to change it. */
1043 #define MAX_CODE_ALIGN 16
1047 const struct processor_costs *cost; /* Processor costs */
1048 const int target_enable; /* Target flags to enable. */
1049 const int target_disable; /* Target flags to disable. */
1050 const int align_loop; /* Default alignments. */
1051 const int align_loop_max_skip;
1052 const int align_jump;
1053 const int align_jump_max_skip;
1054 const int align_func;
1056 const processor_target_table[PROCESSOR_max] =
1058 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1059 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1060 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1061 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1062 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1063 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1064 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1065 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1068 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1071 const char *const name; /* processor name or nickname. */
1072 const enum processor_type processor;
1073 const enum pta_flags
1078 PTA_PREFETCH_SSE = 8,
1084 const processor_alias_table[] =
1086 {"i386", PROCESSOR_I386, 0},
1087 {"i486", PROCESSOR_I486, 0},
1088 {"i586", PROCESSOR_PENTIUM, 0},
1089 {"pentium", PROCESSOR_PENTIUM, 0},
1090 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1091 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1092 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1093 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1094 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1095 {"i686", PROCESSOR_PENTIUMPRO, 0},
1096 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1097 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1098 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1099 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1100 PTA_MMX | PTA_PREFETCH_SSE},
1101 {"k6", PROCESSOR_K6, PTA_MMX},
1102 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1103 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1104 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1106 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1107 | PTA_3DNOW | PTA_3DNOW_A},
1108 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1113 | PTA_3DNOW_A | PTA_SSE},
1114 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1115 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1118 int const pta_size = ARRAY_SIZE (processor_alias_table);
1120 /* By default our XFmode is the 80-bit extended format. If we have
1121 use TFmode instead, it's also the 80-bit format, but with padding. */
1122 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1123 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1125 /* Set the default values for switches whose default depends on TARGET_64BIT
1126 in case they weren't overwritten by command line options. */
1129 if (flag_omit_frame_pointer == 2)
1130 flag_omit_frame_pointer = 1;
1131 if (flag_asynchronous_unwind_tables == 2)
1132 flag_asynchronous_unwind_tables = 1;
1133 if (flag_pcc_struct_return == 2)
1134 flag_pcc_struct_return = 0;
1138 if (flag_omit_frame_pointer == 2)
1139 flag_omit_frame_pointer = 0;
1140 if (flag_asynchronous_unwind_tables == 2)
1141 flag_asynchronous_unwind_tables = 0;
1142 if (flag_pcc_struct_return == 2)
1143 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1146 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1147 SUBTARGET_OVERRIDE_OPTIONS;
1150 if (!ix86_tune_string && ix86_arch_string)
1151 ix86_tune_string = ix86_arch_string;
1152 if (!ix86_tune_string)
1153 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1154 if (!ix86_arch_string)
1155 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1157 if (ix86_cmodel_string != 0)
1159 if (!strcmp (ix86_cmodel_string, "small"))
1160 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1162 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1163 else if (!strcmp (ix86_cmodel_string, "32"))
1164 ix86_cmodel = CM_32;
1165 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1166 ix86_cmodel = CM_KERNEL;
1167 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1168 ix86_cmodel = CM_MEDIUM;
1169 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1170 ix86_cmodel = CM_LARGE;
1172 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1176 ix86_cmodel = CM_32;
1178 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1180 if (ix86_asm_string != 0)
1182 if (!strcmp (ix86_asm_string, "intel"))
1183 ix86_asm_dialect = ASM_INTEL;
1184 else if (!strcmp (ix86_asm_string, "att"))
1185 ix86_asm_dialect = ASM_ATT;
1187 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1189 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1190 error ("code model `%s' not supported in the %s bit mode",
1191 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1192 if (ix86_cmodel == CM_LARGE)
1193 sorry ("code model `large' not supported yet");
1194 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1195 sorry ("%i-bit mode not compiled in",
1196 (target_flags & MASK_64BIT) ? 64 : 32);
1198 for (i = 0; i < pta_size; i++)
1199 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1201 ix86_arch = processor_alias_table[i].processor;
1202 /* Default cpu tuning to the architecture. */
1203 ix86_tune = ix86_arch;
1204 if (processor_alias_table[i].flags & PTA_MMX
1205 && !(target_flags_explicit & MASK_MMX))
1206 target_flags |= MASK_MMX;
1207 if (processor_alias_table[i].flags & PTA_3DNOW
1208 && !(target_flags_explicit & MASK_3DNOW))
1209 target_flags |= MASK_3DNOW;
1210 if (processor_alias_table[i].flags & PTA_3DNOW_A
1211 && !(target_flags_explicit & MASK_3DNOW_A))
1212 target_flags |= MASK_3DNOW_A;
1213 if (processor_alias_table[i].flags & PTA_SSE
1214 && !(target_flags_explicit & MASK_SSE))
1215 target_flags |= MASK_SSE;
1216 if (processor_alias_table[i].flags & PTA_SSE2
1217 && !(target_flags_explicit & MASK_SSE2))
1218 target_flags |= MASK_SSE2;
1219 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1220 x86_prefetch_sse = true;
1221 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1222 error ("CPU you selected does not support x86-64 instruction set");
1227 error ("bad value (%s) for -march= switch", ix86_arch_string);
1229 for (i = 0; i < pta_size; i++)
1230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1232 ix86_tune = processor_alias_table[i].processor;
1233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1234 error ("CPU you selected does not support x86-64 instruction set");
1237 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1238 x86_prefetch_sse = true;
1240 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1243 ix86_cost = &size_cost;
1245 ix86_cost = processor_target_table[ix86_tune].cost;
1246 target_flags |= processor_target_table[ix86_tune].target_enable;
1247 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1249 /* Arrange to set up i386_stack_locals for all functions. */
1250 init_machine_status = ix86_init_machine_status;
1252 /* Validate -mregparm= value. */
1253 if (ix86_regparm_string)
1255 i = atoi (ix86_regparm_string);
1256 if (i < 0 || i > REGPARM_MAX)
1257 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1263 ix86_regparm = REGPARM_MAX;
1265 /* If the user has provided any of the -malign-* options,
1266 warn and use that value only if -falign-* is not set.
1267 Remove this code in GCC 3.2 or later. */
1268 if (ix86_align_loops_string)
1270 warning ("-malign-loops is obsolete, use -falign-loops");
1271 if (align_loops == 0)
1273 i = atoi (ix86_align_loops_string);
1274 if (i < 0 || i > MAX_CODE_ALIGN)
1275 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1277 align_loops = 1 << i;
1281 if (ix86_align_jumps_string)
1283 warning ("-malign-jumps is obsolete, use -falign-jumps");
1284 if (align_jumps == 0)
1286 i = atoi (ix86_align_jumps_string);
1287 if (i < 0 || i > MAX_CODE_ALIGN)
1288 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1290 align_jumps = 1 << i;
1294 if (ix86_align_funcs_string)
1296 warning ("-malign-functions is obsolete, use -falign-functions");
1297 if (align_functions == 0)
1299 i = atoi (ix86_align_funcs_string);
1300 if (i < 0 || i > MAX_CODE_ALIGN)
1301 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1303 align_functions = 1 << i;
1307 /* Default align_* from the processor table. */
1308 if (align_loops == 0)
1310 align_loops = processor_target_table[ix86_tune].align_loop;
1311 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1313 if (align_jumps == 0)
1315 align_jumps = processor_target_table[ix86_tune].align_jump;
1316 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1318 if (align_functions == 0)
1320 align_functions = processor_target_table[ix86_tune].align_func;
1323 /* Validate -mpreferred-stack-boundary= value, or provide default.
1324 The default of 128 bits is for Pentium III's SSE __m128, but we
1325 don't want additional code to keep the stack aligned when
1326 optimizing for code size. */
1327 ix86_preferred_stack_boundary = (optimize_size
1328 ? TARGET_64BIT ? 128 : 32
1330 if (ix86_preferred_stack_boundary_string)
1332 i = atoi (ix86_preferred_stack_boundary_string);
1333 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1334 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1335 TARGET_64BIT ? 4 : 2);
1337 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1340 /* Validate -mbranch-cost= value, or provide default. */
1341 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1342 if (ix86_branch_cost_string)
1344 i = atoi (ix86_branch_cost_string);
1346 error ("-mbranch-cost=%d is not between 0 and 5", i);
1348 ix86_branch_cost = i;
1351 if (ix86_tls_dialect_string)
1353 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1354 ix86_tls_dialect = TLS_DIALECT_GNU;
1355 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1356 ix86_tls_dialect = TLS_DIALECT_SUN;
1358 error ("bad value (%s) for -mtls-dialect= switch",
1359 ix86_tls_dialect_string);
1362 /* Keep nonleaf frame pointers. */
1363 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1364 flag_omit_frame_pointer = 1;
1366 /* If we're doing fast math, we don't care about comparison order
1367 wrt NaNs. This lets us use a shorter comparison sequence. */
1368 if (flag_unsafe_math_optimizations)
1369 target_flags &= ~MASK_IEEE_FP;
1371 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1372 since the insns won't need emulation. */
1373 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1374 target_flags &= ~MASK_NO_FANCY_MATH_387;
1376 /* Turn on SSE2 builtins for -mpni. */
1378 target_flags |= MASK_SSE2;
1380 /* Turn on SSE builtins for -msse2. */
1382 target_flags |= MASK_SSE;
1386 if (TARGET_ALIGN_DOUBLE)
1387 error ("-malign-double makes no sense in the 64bit mode");
1389 error ("-mrtd calling convention not supported in the 64bit mode");
1390 /* Enable by default the SSE and MMX builtins. */
1391 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1392 ix86_fpmath = FPMATH_SSE;
1396 ix86_fpmath = FPMATH_387;
1397 /* i386 ABI does not specify red zone. It still makes sense to use it
1398 when programmer takes care to stack from being destroyed. */
1399 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1400 target_flags |= MASK_NO_RED_ZONE;
1403 if (ix86_fpmath_string != 0)
1405 if (! strcmp (ix86_fpmath_string, "387"))
1406 ix86_fpmath = FPMATH_387;
1407 else if (! strcmp (ix86_fpmath_string, "sse"))
1411 warning ("SSE instruction set disabled, using 387 arithmetics");
1412 ix86_fpmath = FPMATH_387;
1415 ix86_fpmath = FPMATH_SSE;
1417 else if (! strcmp (ix86_fpmath_string, "387,sse")
1418 || ! strcmp (ix86_fpmath_string, "sse,387"))
1422 warning ("SSE instruction set disabled, using 387 arithmetics");
1423 ix86_fpmath = FPMATH_387;
1425 else if (!TARGET_80387)
1427 warning ("387 instruction set disabled, using SSE arithmetics");
1428 ix86_fpmath = FPMATH_SSE;
1431 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1434 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1437 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1441 target_flags |= MASK_MMX;
1442 x86_prefetch_sse = true;
1445 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1448 target_flags |= MASK_MMX;
1449 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1450 extensions it adds. */
1451 if (x86_3dnow_a & (1 << ix86_arch))
1452 target_flags |= MASK_3DNOW_A;
1454 if ((x86_accumulate_outgoing_args & TUNEMASK)
1455 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1457 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1459 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1462 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1463 p = strchr (internal_label_prefix, 'X');
1464 internal_label_prefix_len = p - internal_label_prefix;
1470 optimization_options (level, size)
1472 int size ATTRIBUTE_UNUSED;
1474 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1475 make the problem with not enough registers even worse. */
1476 #ifdef INSN_SCHEDULING
1478 flag_schedule_insns = 0;
1481 /* The default values of these switches depend on the TARGET_64BIT
1482 that is not known at this moment. Mark these values with 2 and
1483 let user the to override these. In case there is no command line option
1484 specifying them, we will set the defaults in override_options. */
1486 flag_omit_frame_pointer = 2;
1487 flag_pcc_struct_return = 2;
1488 flag_asynchronous_unwind_tables = 2;
1491 /* Table of valid machine attributes. */
1492 const struct attribute_spec ix86_attribute_table[] =
1494 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1495 /* Stdcall attribute says callee is responsible for popping arguments
1496 if they are not variable. */
1497 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1498 /* Fastcall attribute says callee is responsible for popping arguments
1499 if they are not variable. */
1500 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1501 /* Cdecl attribute says the callee is a normal C declaration */
1502 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1503 /* Regparm attribute specifies how many integer arguments are to be
1504 passed in registers. */
1505 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1506 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1507 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1508 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1509 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1511 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1512 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1513 { NULL, 0, 0, false, false, false, NULL }
1516 /* Decide whether we can make a sibling call to a function. DECL is the
1517 declaration of the function being targeted by the call and EXP is the
1518 CALL_EXPR representing the call. */
1521 ix86_function_ok_for_sibcall (decl, exp)
1525 /* If we are generating position-independent code, we cannot sibcall
1526 optimize any indirect call, or a direct call to a global function,
1527 as the PLT requires %ebx be live. */
1528 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1531 /* If we are returning floats on the 80387 register stack, we cannot
1532 make a sibcall from a function that doesn't return a float to a
1533 function that does or, conversely, from a function that does return
1534 a float to a function that doesn't; the necessary stack adjustment
1535 would not be executed. */
1536 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1537 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1540 /* If this call is indirect, we'll need to be able to use a call-clobbered
1541 register for the address of the target function. Make sure that all
1542 such registers are not used for passing parameters. */
1543 if (!decl && !TARGET_64BIT)
1545 int regparm = ix86_regparm;
1548 /* We're looking at the CALL_EXPR, we need the type of the function. */
1549 type = TREE_OPERAND (exp, 0); /* pointer expression */
1550 type = TREE_TYPE (type); /* pointer type */
1551 type = TREE_TYPE (type); /* function type */
1553 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1555 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1559 /* ??? Need to count the actual number of registers to be used,
1560 not the possible number of registers. Fix later. */
1565 /* Otherwise okay. That also includes certain types of indirect calls. */
1569 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1570 arguments as in struct attribute_spec.handler. */
1572 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1575 tree args ATTRIBUTE_UNUSED;
1576 int flags ATTRIBUTE_UNUSED;
1579 if (TREE_CODE (*node) != FUNCTION_TYPE
1580 && TREE_CODE (*node) != METHOD_TYPE
1581 && TREE_CODE (*node) != FIELD_DECL
1582 && TREE_CODE (*node) != TYPE_DECL)
1584 warning ("`%s' attribute only applies to functions",
1585 IDENTIFIER_POINTER (name));
1586 *no_add_attrs = true;
1590 if (is_attribute_p ("fastcall", name))
1592 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1594 error ("fastcall and stdcall attributes are not compatible");
1596 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1598 error ("fastcall and regparm attributes are not compatible");
1601 else if (is_attribute_p ("stdcall", name))
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1605 error ("fastcall and stdcall attributes are not compatible");
1612 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1613 *no_add_attrs = true;
1619 /* Handle a "regparm" attribute;
1620 arguments as in struct attribute_spec.handler. */
1622 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1626 int flags ATTRIBUTE_UNUSED;
1629 if (TREE_CODE (*node) != FUNCTION_TYPE
1630 && TREE_CODE (*node) != METHOD_TYPE
1631 && TREE_CODE (*node) != FIELD_DECL
1632 && TREE_CODE (*node) != TYPE_DECL)
1634 warning ("`%s' attribute only applies to functions",
1635 IDENTIFIER_POINTER (name));
1636 *no_add_attrs = true;
1642 cst = TREE_VALUE (args);
1643 if (TREE_CODE (cst) != INTEGER_CST)
1645 warning ("`%s' attribute requires an integer constant argument",
1646 IDENTIFIER_POINTER (name));
1647 *no_add_attrs = true;
1649 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1651 warning ("argument to `%s' attribute larger than %d",
1652 IDENTIFIER_POINTER (name), REGPARM_MAX);
1653 *no_add_attrs = true;
1656 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1658 error ("fastcall and regparm attributes are not compatible");
1665 /* Return 0 if the attributes for two types are incompatible, 1 if they
1666 are compatible, and 2 if they are nearly compatible (which causes a
1667 warning to be generated). */
1670 ix86_comp_type_attributes (type1, type2)
1674 /* Check for mismatch of non-default calling convention. */
1675 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1677 if (TREE_CODE (type1) != FUNCTION_TYPE)
1680 /* Check for mismatched fastcall types */
1681 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1682 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1685 /* Check for mismatched return types (cdecl vs stdcall). */
1686 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1687 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1692 /* Return the regparm value for a fuctio with the indicated TYPE. */
1695 ix86_fntype_regparm (type)
1700 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1702 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1704 return ix86_regparm;
1707 /* Value is the number of bytes of arguments automatically
1708 popped when returning from a subroutine call.
1709 FUNDECL is the declaration node of the function (as a tree),
1710 FUNTYPE is the data type of the function (as a tree),
1711 or for a library call it is an identifier node for the subroutine name.
1712 SIZE is the number of bytes of arguments passed on the stack.
1714 On the 80386, the RTD insn may be used to pop them if the number
1715 of args is fixed, but if the number is variable then the caller
1716 must pop them all. RTD can't be used for library calls now
1717 because the library is compiled with the Unix compiler.
1718 Use of RTD is a selectable option, since it is incompatible with
1719 standard Unix calling sequences. If the option is not selected,
1720 the caller must always pop the args.
1722 The attribute stdcall is equivalent to RTD on a per module basis. */
1725 ix86_return_pops_args (fundecl, funtype, size)
1730 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1732 /* Cdecl functions override -mrtd, and never pop the stack. */
1733 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1735 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1737 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1741 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1742 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1743 == void_type_node)))
1747 /* Lose any fake structure return argument if it is passed on the stack. */
1748 if (aggregate_value_p (TREE_TYPE (funtype))
1751 int nregs = ix86_fntype_regparm (funtype);
1754 return GET_MODE_SIZE (Pmode);
1760 /* Argument support functions. */
1762 /* Return true when register may be used to pass function parameters. */
1764 ix86_function_arg_regno_p (regno)
1769 return (regno < REGPARM_MAX
1770 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1771 if (SSE_REGNO_P (regno) && TARGET_SSE)
1773 /* RAX is used as hidden argument to va_arg functions. */
1776 for (i = 0; i < REGPARM_MAX; i++)
1777 if (regno == x86_64_int_parameter_registers[i])
1782 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1783 for a call to a function whose data type is FNTYPE.
1784 For a library call, FNTYPE is 0. */
1787 init_cumulative_args (cum, fntype, libname, fndecl)
1788 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1789 tree fntype; /* tree ptr for function decl */
1790 rtx libname; /* SYMBOL_REF of library name or 0 */
1793 static CUMULATIVE_ARGS zero_cum;
1794 tree param, next_param;
1795 bool user_convention = false;
1797 if (TARGET_DEBUG_ARG)
1799 fprintf (stderr, "\ninit_cumulative_args (");
1801 fprintf (stderr, "fntype code = %s, ret code = %s",
1802 tree_code_name[(int) TREE_CODE (fntype)],
1803 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1805 fprintf (stderr, "no fntype");
1808 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1813 /* Set up the number of registers to use for passing arguments. */
1814 cum->nregs = ix86_regparm;
1815 cum->sse_nregs = SSE_REGPARM_MAX;
1816 if (fntype && !TARGET_64BIT)
1818 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1822 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1823 user_convention = true;
1826 cum->maybe_vaarg = false;
1828 /* Use ecx and edx registers if function has fastcall attribute */
1829 if (fntype && !TARGET_64BIT)
1831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1835 user_convention = true;
1839 /* Use register calling convention for local functions when possible. */
1840 if (!TARGET_64BIT && !user_convention && fndecl
1841 && flag_unit_at_a_time)
1843 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1846 /* We can't use regparm(3) for nested functions as these use
1847 static chain pointer in third argument. */
1848 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1856 /* Determine if this function has variable arguments. This is
1857 indicated by the last argument being 'void_type_mode' if there
1858 are no variable arguments. If there are variable arguments, then
1859 we won't pass anything in registers */
1863 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1864 param != 0; param = next_param)
1866 next_param = TREE_CHAIN (param);
1867 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1874 cum->maybe_vaarg = true;
1878 if ((!fntype && !libname)
1879 || (fntype && !TYPE_ARG_TYPES (fntype)))
1880 cum->maybe_vaarg = 1;
1882 if (TARGET_DEBUG_ARG)
1883 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1888 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1889 of this code is to classify each 8bytes of incoming argument by the register
1890 class and assign registers accordingly. */
1892 /* Return the union class of CLASS1 and CLASS2.
1893 See the x86-64 PS ABI for details. */
1895 static enum x86_64_reg_class
1896 merge_classes (class1, class2)
1897 enum x86_64_reg_class class1, class2;
1899 /* Rule #1: If both classes are equal, this is the resulting class. */
1900 if (class1 == class2)
1903 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1905 if (class1 == X86_64_NO_CLASS)
1907 if (class2 == X86_64_NO_CLASS)
1910 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1911 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1912 return X86_64_MEMORY_CLASS;
1914 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1915 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1916 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1917 return X86_64_INTEGERSI_CLASS;
1918 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1919 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1920 return X86_64_INTEGER_CLASS;
1922 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1923 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1924 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1925 return X86_64_MEMORY_CLASS;
1927 /* Rule #6: Otherwise class SSE is used. */
1928 return X86_64_SSE_CLASS;
1931 /* Classify the argument of type TYPE and mode MODE.
1932 CLASSES will be filled by the register class used to pass each word
1933 of the operand. The number of words is returned. In case the parameter
1934 should be passed in memory, 0 is returned. As a special case for zero
1935 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1937 BIT_OFFSET is used internally for handling records and specifies offset
1938 of the offset in bits modulo 256 to avoid overflow cases.
1940 See the x86-64 PS ABI for details.
1944 classify_argument (mode, type, classes, bit_offset)
1945 enum machine_mode mode;
1947 enum x86_64_reg_class classes[MAX_CLASSES];
1951 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1952 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1954 /* Variable sized entities are always passed/returned in memory. */
1958 if (mode != VOIDmode
1959 && MUST_PASS_IN_STACK (mode, type))
1962 if (type && AGGREGATE_TYPE_P (type))
1966 enum x86_64_reg_class subclasses[MAX_CLASSES];
1968 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1972 for (i = 0; i < words; i++)
1973 classes[i] = X86_64_NO_CLASS;
1975 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1976 signalize memory class, so handle it as special case. */
1979 classes[0] = X86_64_NO_CLASS;
1983 /* Classify each field of record and merge classes. */
1984 if (TREE_CODE (type) == RECORD_TYPE)
1986 /* For classes first merge in the field of the subclasses. */
1987 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1989 tree bases = TYPE_BINFO_BASETYPES (type);
1990 int n_bases = TREE_VEC_LENGTH (bases);
1993 for (i = 0; i < n_bases; ++i)
1995 tree binfo = TREE_VEC_ELT (bases, i);
1997 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1998 tree type = BINFO_TYPE (binfo);
2000 num = classify_argument (TYPE_MODE (type),
2002 (offset + bit_offset) % 256);
2005 for (i = 0; i < num; i++)
2007 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2009 merge_classes (subclasses[i], classes[i + pos]);
2013 /* And now merge the fields of structure. */
2014 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2016 if (TREE_CODE (field) == FIELD_DECL)
2020 /* Bitfields are always classified as integer. Handle them
2021 early, since later code would consider them to be
2022 misaligned integers. */
2023 if (DECL_BIT_FIELD (field))
2025 for (i = int_bit_position (field) / 8 / 8;
2026 i < (int_bit_position (field)
2027 + tree_low_cst (DECL_SIZE (field), 0)
2030 merge_classes (X86_64_INTEGER_CLASS,
2035 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2036 TREE_TYPE (field), subclasses,
2037 (int_bit_position (field)
2038 + bit_offset) % 256);
2041 for (i = 0; i < num; i++)
2044 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2046 merge_classes (subclasses[i], classes[i + pos]);
2052 /* Arrays are handled as small records. */
2053 else if (TREE_CODE (type) == ARRAY_TYPE)
2056 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2057 TREE_TYPE (type), subclasses, bit_offset);
2061 /* The partial classes are now full classes. */
2062 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2063 subclasses[0] = X86_64_SSE_CLASS;
2064 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2065 subclasses[0] = X86_64_INTEGER_CLASS;
2067 for (i = 0; i < words; i++)
2068 classes[i] = subclasses[i % num];
2070 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2071 else if (TREE_CODE (type) == UNION_TYPE
2072 || TREE_CODE (type) == QUAL_UNION_TYPE)
2074 /* For classes first merge in the field of the subclasses. */
2075 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2077 tree bases = TYPE_BINFO_BASETYPES (type);
2078 int n_bases = TREE_VEC_LENGTH (bases);
2081 for (i = 0; i < n_bases; ++i)
2083 tree binfo = TREE_VEC_ELT (bases, i);
2085 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2086 tree type = BINFO_TYPE (binfo);
2088 num = classify_argument (TYPE_MODE (type),
2090 (offset + (bit_offset % 64)) % 256);
2093 for (i = 0; i < num; i++)
2095 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2097 merge_classes (subclasses[i], classes[i + pos]);
2101 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2103 if (TREE_CODE (field) == FIELD_DECL)
2106 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2107 TREE_TYPE (field), subclasses,
2111 for (i = 0; i < num; i++)
2112 classes[i] = merge_classes (subclasses[i], classes[i]);
2119 /* Final merger cleanup. */
2120 for (i = 0; i < words; i++)
2122 /* If one class is MEMORY, everything should be passed in
2124 if (classes[i] == X86_64_MEMORY_CLASS)
2127 /* The X86_64_SSEUP_CLASS should be always preceded by
2128 X86_64_SSE_CLASS. */
2129 if (classes[i] == X86_64_SSEUP_CLASS
2130 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2131 classes[i] = X86_64_SSE_CLASS;
2133 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2134 if (classes[i] == X86_64_X87UP_CLASS
2135 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2136 classes[i] = X86_64_SSE_CLASS;
2141 /* Compute alignment needed. We align all types to natural boundaries with
2142 exception of XFmode that is aligned to 64bits. */
2143 if (mode != VOIDmode && mode != BLKmode)
2145 int mode_alignment = GET_MODE_BITSIZE (mode);
2148 mode_alignment = 128;
2149 else if (mode == XCmode)
2150 mode_alignment = 256;
2151 /* Misaligned fields are always returned in memory. */
2152 if (bit_offset % mode_alignment)
2156 /* Classification of atomic types. */
2166 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2167 classes[0] = X86_64_INTEGERSI_CLASS;
2169 classes[0] = X86_64_INTEGER_CLASS;
2173 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2176 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2177 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2180 if (!(bit_offset % 64))
2181 classes[0] = X86_64_SSESF_CLASS;
2183 classes[0] = X86_64_SSE_CLASS;
2186 classes[0] = X86_64_SSEDF_CLASS;
2189 classes[0] = X86_64_X87_CLASS;
2190 classes[1] = X86_64_X87UP_CLASS;
2193 classes[0] = X86_64_X87_CLASS;
2194 classes[1] = X86_64_X87UP_CLASS;
2195 classes[2] = X86_64_X87_CLASS;
2196 classes[3] = X86_64_X87UP_CLASS;
2199 classes[0] = X86_64_SSEDF_CLASS;
2200 classes[1] = X86_64_SSEDF_CLASS;
2203 classes[0] = X86_64_SSE_CLASS;
2211 classes[0] = X86_64_SSE_CLASS;
2212 classes[1] = X86_64_SSEUP_CLASS;
2227 /* Examine the argument and return set number of register required in each
2228 class. Return 0 iff parameter should be passed in memory. */
2230 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2231 enum machine_mode mode;
2233 int *int_nregs, *sse_nregs;
2236 enum x86_64_reg_class class[MAX_CLASSES];
2237 int n = classify_argument (mode, type, class, 0);
2243 for (n--; n >= 0; n--)
2246 case X86_64_INTEGER_CLASS:
2247 case X86_64_INTEGERSI_CLASS:
2250 case X86_64_SSE_CLASS:
2251 case X86_64_SSESF_CLASS:
2252 case X86_64_SSEDF_CLASS:
2255 case X86_64_NO_CLASS:
2256 case X86_64_SSEUP_CLASS:
2258 case X86_64_X87_CLASS:
2259 case X86_64_X87UP_CLASS:
2263 case X86_64_MEMORY_CLASS:
2268 /* Construct container for the argument used by GCC interface. See
2269 FUNCTION_ARG for the detailed description. */
2271 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2272 enum machine_mode mode;
2275 int nintregs, nsseregs;
2279 enum machine_mode tmpmode;
2281 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2282 enum x86_64_reg_class class[MAX_CLASSES];
2286 int needed_sseregs, needed_intregs;
2287 rtx exp[MAX_CLASSES];
2290 n = classify_argument (mode, type, class, 0);
2291 if (TARGET_DEBUG_ARG)
2294 fprintf (stderr, "Memory class\n");
2297 fprintf (stderr, "Classes:");
2298 for (i = 0; i < n; i++)
2300 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2302 fprintf (stderr, "\n");
2307 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2309 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2312 /* First construct simple cases. Avoid SCmode, since we want to use
2313 single register to pass this type. */
2314 if (n == 1 && mode != SCmode)
2317 case X86_64_INTEGER_CLASS:
2318 case X86_64_INTEGERSI_CLASS:
2319 return gen_rtx_REG (mode, intreg[0]);
2320 case X86_64_SSE_CLASS:
2321 case X86_64_SSESF_CLASS:
2322 case X86_64_SSEDF_CLASS:
2323 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2324 case X86_64_X87_CLASS:
2325 return gen_rtx_REG (mode, FIRST_STACK_REG);
2326 case X86_64_NO_CLASS:
2327 /* Zero sized array, struct or class. */
2332 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2333 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2335 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2336 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2337 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2338 && class[1] == X86_64_INTEGER_CLASS
2339 && (mode == CDImode || mode == TImode)
2340 && intreg[0] + 1 == intreg[1])
2341 return gen_rtx_REG (mode, intreg[0]);
2343 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2344 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2345 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2347 /* Otherwise figure out the entries of the PARALLEL. */
2348 for (i = 0; i < n; i++)
2352 case X86_64_NO_CLASS:
2354 case X86_64_INTEGER_CLASS:
2355 case X86_64_INTEGERSI_CLASS:
2356 /* Merge TImodes on aligned occasions here too. */
2357 if (i * 8 + 8 > bytes)
2358 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2359 else if (class[i] == X86_64_INTEGERSI_CLASS)
2363 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2364 if (tmpmode == BLKmode)
2366 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2367 gen_rtx_REG (tmpmode, *intreg),
2371 case X86_64_SSESF_CLASS:
2372 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2373 gen_rtx_REG (SFmode,
2374 SSE_REGNO (sse_regno)),
2378 case X86_64_SSEDF_CLASS:
2379 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2380 gen_rtx_REG (DFmode,
2381 SSE_REGNO (sse_regno)),
2385 case X86_64_SSE_CLASS:
2386 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2390 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2391 gen_rtx_REG (tmpmode,
2392 SSE_REGNO (sse_regno)),
2394 if (tmpmode == TImode)
2402 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2403 for (i = 0; i < nexps; i++)
2404 XVECEXP (ret, 0, i) = exp [i];
2408 /* Update the data in CUM to advance over an argument
2409 of mode MODE and data type TYPE.
2410 (TYPE is null for libcalls where that information may not be available.) */
2413 function_arg_advance (cum, mode, type, named)
2414 CUMULATIVE_ARGS *cum; /* current arg information */
2415 enum machine_mode mode; /* current arg mode */
2416 tree type; /* type of the argument or 0 if lib support */
2417 int named; /* whether or not the argument was named */
2420 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2421 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2423 if (TARGET_DEBUG_ARG)
2425 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2426 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2429 int int_nregs, sse_nregs;
2430 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2431 cum->words += words;
2432 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2434 cum->nregs -= int_nregs;
2435 cum->sse_nregs -= sse_nregs;
2436 cum->regno += int_nregs;
2437 cum->sse_regno += sse_nregs;
2440 cum->words += words;
2444 if (TARGET_SSE && mode == TImode)
2446 cum->sse_words += words;
2447 cum->sse_nregs -= 1;
2448 cum->sse_regno += 1;
2449 if (cum->sse_nregs <= 0)
2457 cum->words += words;
2458 cum->nregs -= words;
2459 cum->regno += words;
2461 if (cum->nregs <= 0)
2471 /* Define where to put the arguments to a function.
2472 Value is zero to push the argument on the stack,
2473 or a hard register in which to store the argument.
2475 MODE is the argument's machine mode.
2476 TYPE is the data type of the argument (as a tree).
2477 This is null for libcalls where that information may
2479 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2480 the preceding args and about the function being called.
2481 NAMED is nonzero if this argument is a named parameter
2482 (otherwise it is an extra parameter matching an ellipsis). */
2485 function_arg (cum, mode, type, named)
2486 CUMULATIVE_ARGS *cum; /* current arg information */
2487 enum machine_mode mode; /* current arg mode */
2488 tree type; /* type of the argument or 0 if lib support */
2489 int named; /* != 0 for normal args, == 0 for ... args */
2493 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2494 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2496 /* Handle a hidden AL argument containing number of registers for varargs
2497 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2499 if (mode == VOIDmode)
2502 return GEN_INT (cum->maybe_vaarg
2503 ? (cum->sse_nregs < 0
2511 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2512 &x86_64_int_parameter_registers [cum->regno],
2517 /* For now, pass fp/complex values on the stack. */
2529 if (words <= cum->nregs)
2531 int regno = cum->regno;
2533 /* Fastcall allocates the first two DWORD (SImode) or
2534 smaller arguments to ECX and EDX. */
2537 if (mode == BLKmode || mode == DImode)
2540 /* ECX not EAX is the first allocated register. */
2544 ret = gen_rtx_REG (mode, regno);
2549 ret = gen_rtx_REG (mode, cum->sse_regno);
2553 if (TARGET_DEBUG_ARG)
2556 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2557 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2560 print_simple_rtl (stderr, ret);
2562 fprintf (stderr, ", stack");
2564 fprintf (stderr, " )\n");
2570 /* A C expression that indicates when an argument must be passed by
2571 reference. If nonzero for an argument, a copy of that argument is
2572 made in memory and a pointer to the argument is passed instead of
2573 the argument itself. The pointer is passed in whatever way is
2574 appropriate for passing a pointer to that type. */
2577 function_arg_pass_by_reference (cum, mode, type, named)
2578 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2579 enum machine_mode mode ATTRIBUTE_UNUSED;
2581 int named ATTRIBUTE_UNUSED;
2586 if (type && int_size_in_bytes (type) == -1)
2588 if (TARGET_DEBUG_ARG)
2589 fprintf (stderr, "function_arg_pass_by_reference\n");
2596 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2599 contains_128bit_aligned_vector_p (type)
2602 enum machine_mode mode = TYPE_MODE (type);
2603 if (SSE_REG_MODE_P (mode)
2604 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2606 if (TYPE_ALIGN (type) < 128)
2609 if (AGGREGATE_TYPE_P (type))
2611 /* Walk the agregates recursivly. */
2612 if (TREE_CODE (type) == RECORD_TYPE
2613 || TREE_CODE (type) == UNION_TYPE
2614 || TREE_CODE (type) == QUAL_UNION_TYPE)
2618 if (TYPE_BINFO (type) != NULL
2619 && TYPE_BINFO_BASETYPES (type) != NULL)
2621 tree bases = TYPE_BINFO_BASETYPES (type);
2622 int n_bases = TREE_VEC_LENGTH (bases);
2625 for (i = 0; i < n_bases; ++i)
2627 tree binfo = TREE_VEC_ELT (bases, i);
2628 tree type = BINFO_TYPE (binfo);
2630 if (contains_128bit_aligned_vector_p (type))
2634 /* And now merge the fields of structure. */
2635 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2637 if (TREE_CODE (field) == FIELD_DECL
2638 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2642 /* Just for use if some languages passes arrays by value. */
2643 else if (TREE_CODE (type) == ARRAY_TYPE)
2645 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2654 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2658 ix86_function_arg_boundary (mode, type)
2659 enum machine_mode mode;
2664 align = TYPE_ALIGN (type);
2666 align = GET_MODE_ALIGNMENT (mode);
2667 if (align < PARM_BOUNDARY)
2668 align = PARM_BOUNDARY;
2671 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2672 make an exception for SSE modes since these require 128bit
2675 The handling here differs from field_alignment. ICC aligns MMX
2676 arguments to 4 byte boundaries, while structure fields are aligned
2677 to 8 byte boundaries. */
2680 if (!SSE_REG_MODE_P (mode))
2681 align = PARM_BOUNDARY;
2685 if (!contains_128bit_aligned_vector_p (type))
2686 align = PARM_BOUNDARY;
2688 if (align != PARM_BOUNDARY && !TARGET_SSE)
2696 /* Return true if N is a possible register number of function value. */
2698 ix86_function_value_regno_p (regno)
2703 return ((regno) == 0
2704 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2705 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2707 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2708 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2709 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2712 /* Define how to find the value returned by a function.
2713 VALTYPE is the data type of the value (as a tree).
2714 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2715 otherwise, FUNC is 0. */
2717 ix86_function_value (valtype)
2722 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2723 REGPARM_MAX, SSE_REGPARM_MAX,
2724 x86_64_int_return_registers, 0);
2725 /* For zero sized structures, construct_container return NULL, but we need
2726 to keep rest of compiler happy by returning meaningful value. */
2728 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2732 return gen_rtx_REG (TYPE_MODE (valtype),
2733 ix86_value_regno (TYPE_MODE (valtype)));
2736 /* Return false iff type is returned in memory. */
2738 ix86_return_in_memory (type)
2741 int needed_intregs, needed_sseregs;
2744 return !examine_argument (TYPE_MODE (type), type, 1,
2745 &needed_intregs, &needed_sseregs);
2749 if (TYPE_MODE (type) == BLKmode)
2751 else if (MS_AGGREGATE_RETURN
2752 && AGGREGATE_TYPE_P (type)
2753 && int_size_in_bytes(type) <= 8)
2755 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2756 && int_size_in_bytes (type) == 8)
2757 || (int_size_in_bytes (type) > 12
2758 && TYPE_MODE (type) != TImode
2759 && TYPE_MODE (type) != TFmode
2760 && !VECTOR_MODE_P (TYPE_MODE (type))))
2766 /* Define how to find the value returned by a library function
2767 assuming the value has mode MODE. */
2769 ix86_libcall_value (mode)
2770 enum machine_mode mode;
2780 return gen_rtx_REG (mode, FIRST_SSE_REG);
2783 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2785 return gen_rtx_REG (mode, 0);
2789 return gen_rtx_REG (mode, ix86_value_regno (mode));
2792 /* Given a mode, return the register to use for a return value. */
2795 ix86_value_regno (mode)
2796 enum machine_mode mode;
2798 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2799 return FIRST_FLOAT_REG;
2800 if (mode == TImode || VECTOR_MODE_P (mode))
2801 return FIRST_SSE_REG;
2805 /* Create the va_list data type. */
2808 ix86_build_va_list ()
2810 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2812 /* For i386 we use plain pointer to argument area. */
2814 return build_pointer_type (char_type_node);
2816 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2817 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2819 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2820 unsigned_type_node);
2821 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2822 unsigned_type_node);
2823 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2825 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2828 DECL_FIELD_CONTEXT (f_gpr) = record;
2829 DECL_FIELD_CONTEXT (f_fpr) = record;
2830 DECL_FIELD_CONTEXT (f_ovf) = record;
2831 DECL_FIELD_CONTEXT (f_sav) = record;
2833 TREE_CHAIN (record) = type_decl;
2834 TYPE_NAME (record) = type_decl;
2835 TYPE_FIELDS (record) = f_gpr;
2836 TREE_CHAIN (f_gpr) = f_fpr;
2837 TREE_CHAIN (f_fpr) = f_ovf;
2838 TREE_CHAIN (f_ovf) = f_sav;
2840 layout_type (record);
2842 /* The correct type is an array type of one element. */
2843 return build_array_type (record, build_index_type (size_zero_node));
2846 /* Perform any needed actions needed for a function that is receiving a
2847 variable number of arguments.
2851 MODE and TYPE are the mode and type of the current parameter.
2853 PRETEND_SIZE is a variable that should be set to the amount of stack
2854 that must be pushed by the prolog to pretend that our caller pushed
2857 Normally, this macro will push all remaining incoming registers on the
2858 stack and set PRETEND_SIZE to the length of the registers pushed. */
2861 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2862 CUMULATIVE_ARGS *cum;
2863 enum machine_mode mode;
2865 int *pretend_size ATTRIBUTE_UNUSED;
2869 CUMULATIVE_ARGS next_cum;
2870 rtx save_area = NULL_RTX, mem;
2883 /* Indicate to allocate space on the stack for varargs save area. */
2884 ix86_save_varrargs_registers = 1;
2886 cfun->stack_alignment_needed = 128;
2888 fntype = TREE_TYPE (current_function_decl);
2889 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2890 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2891 != void_type_node));
2893 /* For varargs, we do not want to skip the dummy va_dcl argument.
2894 For stdargs, we do want to skip the last named argument. */
2897 function_arg_advance (&next_cum, mode, type, 1);
2900 save_area = frame_pointer_rtx;
2902 set = get_varargs_alias_set ();
2904 for (i = next_cum.regno; i < ix86_regparm; i++)
2906 mem = gen_rtx_MEM (Pmode,
2907 plus_constant (save_area, i * UNITS_PER_WORD));
2908 set_mem_alias_set (mem, set);
2909 emit_move_insn (mem, gen_rtx_REG (Pmode,
2910 x86_64_int_parameter_registers[i]));
2913 if (next_cum.sse_nregs)
2915 /* Now emit code to save SSE registers. The AX parameter contains number
2916 of SSE parameter registers used to call this function. We use
2917 sse_prologue_save insn template that produces computed jump across
2918 SSE saves. We need some preparation work to get this working. */
2920 label = gen_label_rtx ();
2921 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2923 /* Compute address to jump to :
2924 label - 5*eax + nnamed_sse_arguments*5 */
2925 tmp_reg = gen_reg_rtx (Pmode);
2926 nsse_reg = gen_reg_rtx (Pmode);
2927 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2928 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2929 gen_rtx_MULT (Pmode, nsse_reg,
2931 if (next_cum.sse_regno)
2934 gen_rtx_CONST (DImode,
2935 gen_rtx_PLUS (DImode,
2937 GEN_INT (next_cum.sse_regno * 4))));
2939 emit_move_insn (nsse_reg, label_ref);
2940 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2942 /* Compute address of memory block we save into. We always use pointer
2943 pointing 127 bytes after first byte to store - this is needed to keep
2944 instruction size limited by 4 bytes. */
2945 tmp_reg = gen_reg_rtx (Pmode);
2946 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2947 plus_constant (save_area,
2948 8 * REGPARM_MAX + 127)));
2949 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2950 set_mem_alias_set (mem, set);
2951 set_mem_align (mem, BITS_PER_WORD);
2953 /* And finally do the dirty job! */
2954 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2955 GEN_INT (next_cum.sse_regno), label));
2960 /* Implement va_start. */
2963 ix86_va_start (valist, nextarg)
2967 HOST_WIDE_INT words, n_gpr, n_fpr;
2968 tree f_gpr, f_fpr, f_ovf, f_sav;
2969 tree gpr, fpr, ovf, sav, t;
2971 /* Only 64bit target needs something special. */
2974 std_expand_builtin_va_start (valist, nextarg);
2978 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2979 f_fpr = TREE_CHAIN (f_gpr);
2980 f_ovf = TREE_CHAIN (f_fpr);
2981 f_sav = TREE_CHAIN (f_ovf);
2983 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2984 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2985 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2986 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2987 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2989 /* Count number of gp and fp argument registers used. */
2990 words = current_function_args_info.words;
2991 n_gpr = current_function_args_info.regno;
2992 n_fpr = current_function_args_info.sse_regno;
2994 if (TARGET_DEBUG_ARG)
2995 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2996 (int) words, (int) n_gpr, (int) n_fpr);
2998 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2999 build_int_2 (n_gpr * 8, 0));
3000 TREE_SIDE_EFFECTS (t) = 1;
3001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3003 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3004 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3005 TREE_SIDE_EFFECTS (t) = 1;
3006 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3008 /* Find the overflow area. */
3009 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3011 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3012 build_int_2 (words * UNITS_PER_WORD, 0));
3013 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3014 TREE_SIDE_EFFECTS (t) = 1;
3015 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3017 /* Find the register save area.
3018 Prologue of the function save it right above stack frame. */
3019 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3020 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3021 TREE_SIDE_EFFECTS (t) = 1;
3022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3025 /* Implement va_arg. */
3027 ix86_va_arg (valist, type)
3030 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3031 tree f_gpr, f_fpr, f_ovf, f_sav;
3032 tree gpr, fpr, ovf, sav, t;
3034 rtx lab_false, lab_over = NULL_RTX;
3039 /* Only 64bit target needs something special. */
3042 return std_expand_builtin_va_arg (valist, type);
3045 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3046 f_fpr = TREE_CHAIN (f_gpr);
3047 f_ovf = TREE_CHAIN (f_fpr);
3048 f_sav = TREE_CHAIN (f_ovf);
3050 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3051 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3052 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3053 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3054 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3056 size = int_size_in_bytes (type);
3059 /* Passed by reference. */
3061 type = build_pointer_type (type);
3062 size = int_size_in_bytes (type);
3064 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3066 container = construct_container (TYPE_MODE (type), type, 0,
3067 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3069 * Pull the value out of the saved registers ...
3072 addr_rtx = gen_reg_rtx (Pmode);
3076 rtx int_addr_rtx, sse_addr_rtx;
3077 int needed_intregs, needed_sseregs;
3080 lab_over = gen_label_rtx ();
3081 lab_false = gen_label_rtx ();
3083 examine_argument (TYPE_MODE (type), type, 0,
3084 &needed_intregs, &needed_sseregs);
3087 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3088 || TYPE_ALIGN (type) > 128);
3090 /* In case we are passing structure, verify that it is consecutive block
3091 on the register save area. If not we need to do moves. */
3092 if (!need_temp && !REG_P (container))
3094 /* Verify that all registers are strictly consecutive */
3095 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3099 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3101 rtx slot = XVECEXP (container, 0, i);
3102 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3103 || INTVAL (XEXP (slot, 1)) != i * 16)
3111 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3113 rtx slot = XVECEXP (container, 0, i);
3114 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3115 || INTVAL (XEXP (slot, 1)) != i * 8)
3122 int_addr_rtx = addr_rtx;
3123 sse_addr_rtx = addr_rtx;
3127 int_addr_rtx = gen_reg_rtx (Pmode);
3128 sse_addr_rtx = gen_reg_rtx (Pmode);
3130 /* First ensure that we fit completely in registers. */
3133 emit_cmp_and_jump_insns (expand_expr
3134 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3135 GEN_INT ((REGPARM_MAX - needed_intregs +
3136 1) * 8), GE, const1_rtx, SImode,
3141 emit_cmp_and_jump_insns (expand_expr
3142 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3143 GEN_INT ((SSE_REGPARM_MAX -
3144 needed_sseregs + 1) * 16 +
3145 REGPARM_MAX * 8), GE, const1_rtx,
3146 SImode, 1, lab_false);
3149 /* Compute index to start of area used for integer regs. */
3152 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3153 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3154 if (r != int_addr_rtx)
3155 emit_move_insn (int_addr_rtx, r);
3159 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3160 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3161 if (r != sse_addr_rtx)
3162 emit_move_insn (sse_addr_rtx, r);
3170 /* Never use the memory itself, as it has the alias set. */
3171 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3172 mem = gen_rtx_MEM (BLKmode, x);
3173 force_operand (x, addr_rtx);
3174 set_mem_alias_set (mem, get_varargs_alias_set ());
3175 set_mem_align (mem, BITS_PER_UNIT);
3177 for (i = 0; i < XVECLEN (container, 0); i++)
3179 rtx slot = XVECEXP (container, 0, i);
3180 rtx reg = XEXP (slot, 0);
3181 enum machine_mode mode = GET_MODE (reg);
3187 if (SSE_REGNO_P (REGNO (reg)))
3189 src_addr = sse_addr_rtx;
3190 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3194 src_addr = int_addr_rtx;
3195 src_offset = REGNO (reg) * 8;
3197 src_mem = gen_rtx_MEM (mode, src_addr);
3198 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3199 src_mem = adjust_address (src_mem, mode, src_offset);
3200 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3201 emit_move_insn (dest_mem, src_mem);
3208 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3209 build_int_2 (needed_intregs * 8, 0));
3210 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3211 TREE_SIDE_EFFECTS (t) = 1;
3212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3217 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3218 build_int_2 (needed_sseregs * 16, 0));
3219 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3220 TREE_SIDE_EFFECTS (t) = 1;
3221 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3224 emit_jump_insn (gen_jump (lab_over));
3226 emit_label (lab_false);
3229 /* ... otherwise out of the overflow area. */
3231 /* Care for on-stack alignment if needed. */
3232 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3236 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3237 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3238 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3242 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3244 emit_move_insn (addr_rtx, r);
3247 build (PLUS_EXPR, TREE_TYPE (t), t,
3248 build_int_2 (rsize * UNITS_PER_WORD, 0));
3249 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3254 emit_label (lab_over);
3258 r = gen_rtx_MEM (Pmode, addr_rtx);
3259 set_mem_alias_set (r, get_varargs_alias_set ());
3260 emit_move_insn (addr_rtx, r);
3266 /* Return nonzero if OP is either a i387 or SSE fp register. */
3268 any_fp_register_operand (op, mode)
3270 enum machine_mode mode ATTRIBUTE_UNUSED;
3272 return ANY_FP_REG_P (op);
3275 /* Return nonzero if OP is an i387 fp register. */
3277 fp_register_operand (op, mode)
3279 enum machine_mode mode ATTRIBUTE_UNUSED;
3281 return FP_REG_P (op);
3284 /* Return nonzero if OP is a non-fp register_operand. */
3286 register_and_not_any_fp_reg_operand (op, mode)
3288 enum machine_mode mode;
3290 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3293 /* Return nonzero if OP is a register operand other than an
3294 i387 fp register. */
3296 register_and_not_fp_reg_operand (op, mode)
3298 enum machine_mode mode;
3300 return register_operand (op, mode) && !FP_REG_P (op);
3303 /* Return nonzero if OP is general operand representable on x86_64. */
3306 x86_64_general_operand (op, mode)
3308 enum machine_mode mode;
3311 return general_operand (op, mode);
3312 if (nonimmediate_operand (op, mode))
3314 return x86_64_sign_extended_value (op);
3317 /* Return nonzero if OP is general operand representable on x86_64
3318 as either sign extended or zero extended constant. */
3321 x86_64_szext_general_operand (op, mode)
3323 enum machine_mode mode;
3326 return general_operand (op, mode);
3327 if (nonimmediate_operand (op, mode))
3329 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3332 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3335 x86_64_nonmemory_operand (op, mode)
3337 enum machine_mode mode;
3340 return nonmemory_operand (op, mode);
3341 if (register_operand (op, mode))
3343 return x86_64_sign_extended_value (op);
3346 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3349 x86_64_movabs_operand (op, mode)
3351 enum machine_mode mode;
3353 if (!TARGET_64BIT || !flag_pic)
3354 return nonmemory_operand (op, mode);
3355 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3357 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3362 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3365 x86_64_szext_nonmemory_operand (op, mode)
3367 enum machine_mode mode;
3370 return nonmemory_operand (op, mode);
3371 if (register_operand (op, mode))
3373 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3376 /* Return nonzero if OP is immediate operand representable on x86_64. */
3379 x86_64_immediate_operand (op, mode)
3381 enum machine_mode mode;
3384 return immediate_operand (op, mode);
3385 return x86_64_sign_extended_value (op);
3388 /* Return nonzero if OP is immediate operand representable on x86_64. */
3391 x86_64_zext_immediate_operand (op, mode)
3393 enum machine_mode mode ATTRIBUTE_UNUSED;
3395 return x86_64_zero_extended_value (op);
3398 /* Return nonzero if OP is (const_int 1), else return zero. */
3401 const_int_1_operand (op, mode)
3403 enum machine_mode mode ATTRIBUTE_UNUSED;
3405 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3408 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3409 for shift & compare patterns, as shifting by 0 does not change flags),
3410 else return zero. */
3413 const_int_1_31_operand (op, mode)
3415 enum machine_mode mode ATTRIBUTE_UNUSED;
3417 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3420 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3421 reference and a constant. */
3424 symbolic_operand (op, mode)
3426 enum machine_mode mode ATTRIBUTE_UNUSED;
3428 switch (GET_CODE (op))
3436 if (GET_CODE (op) == SYMBOL_REF
3437 || GET_CODE (op) == LABEL_REF
3438 || (GET_CODE (op) == UNSPEC
3439 && (XINT (op, 1) == UNSPEC_GOT
3440 || XINT (op, 1) == UNSPEC_GOTOFF
3441 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3443 if (GET_CODE (op) != PLUS
3444 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3448 if (GET_CODE (op) == SYMBOL_REF
3449 || GET_CODE (op) == LABEL_REF)
3451 /* Only @GOTOFF gets offsets. */
3452 if (GET_CODE (op) != UNSPEC
3453 || XINT (op, 1) != UNSPEC_GOTOFF)
3456 op = XVECEXP (op, 0, 0);
3457 if (GET_CODE (op) == SYMBOL_REF
3458 || GET_CODE (op) == LABEL_REF)
3467 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3470 pic_symbolic_operand (op, mode)
3472 enum machine_mode mode ATTRIBUTE_UNUSED;
3474 if (GET_CODE (op) != CONST)
3479 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3484 if (GET_CODE (op) == UNSPEC)
3486 if (GET_CODE (op) != PLUS
3487 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3490 if (GET_CODE (op) == UNSPEC)
3496 /* Return true if OP is a symbolic operand that resolves locally. */
3499 local_symbolic_operand (op, mode)
3501 enum machine_mode mode ATTRIBUTE_UNUSED;
3503 if (GET_CODE (op) == CONST
3504 && GET_CODE (XEXP (op, 0)) == PLUS
3505 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3506 op = XEXP (XEXP (op, 0), 0);
3508 if (GET_CODE (op) == LABEL_REF)
3511 if (GET_CODE (op) != SYMBOL_REF)
3514 if (SYMBOL_REF_LOCAL_P (op))
3517 /* There is, however, a not insubstantial body of code in the rest of
3518 the compiler that assumes it can just stick the results of
3519 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3520 /* ??? This is a hack. Should update the body of the compiler to
3521 always create a DECL an invoke targetm.encode_section_info. */
3522 if (strncmp (XSTR (op, 0), internal_label_prefix,
3523 internal_label_prefix_len) == 0)
3529 /* Test for various thread-local symbols. */
3532 tls_symbolic_operand (op, mode)
3534 enum machine_mode mode ATTRIBUTE_UNUSED;
3536 if (GET_CODE (op) != SYMBOL_REF)
3538 return SYMBOL_REF_TLS_MODEL (op);
3542 tls_symbolic_operand_1 (op, kind)
3544 enum tls_model kind;
3546 if (GET_CODE (op) != SYMBOL_REF)
3548 return SYMBOL_REF_TLS_MODEL (op) == kind;
3552 global_dynamic_symbolic_operand (op, mode)
3554 enum machine_mode mode ATTRIBUTE_UNUSED;
3556 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3560 local_dynamic_symbolic_operand (op, mode)
3562 enum machine_mode mode ATTRIBUTE_UNUSED;
3564 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3568 initial_exec_symbolic_operand (op, mode)
3570 enum machine_mode mode ATTRIBUTE_UNUSED;
3572 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3576 local_exec_symbolic_operand (op, mode)
3578 enum machine_mode mode ATTRIBUTE_UNUSED;
3580 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3583 /* Test for a valid operand for a call instruction. Don't allow the
3584 arg pointer register or virtual regs since they may decay into
3585 reg + const, which the patterns can't handle. */
3588 call_insn_operand (op, mode)
3590 enum machine_mode mode ATTRIBUTE_UNUSED;
3592 /* Disallow indirect through a virtual register. This leads to
3593 compiler aborts when trying to eliminate them. */
3594 if (GET_CODE (op) == REG
3595 && (op == arg_pointer_rtx
3596 || op == frame_pointer_rtx
3597 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3598 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3601 /* Disallow `call 1234'. Due to varying assembler lameness this
3602 gets either rejected or translated to `call .+1234'. */
3603 if (GET_CODE (op) == CONST_INT)
3606 /* Explicitly allow SYMBOL_REF even if pic. */
3607 if (GET_CODE (op) == SYMBOL_REF)
3610 /* Otherwise we can allow any general_operand in the address. */
3611 return general_operand (op, Pmode);
3614 /* Test for a valid operand for a call instruction. Don't allow the
3615 arg pointer register or virtual regs since they may decay into
3616 reg + const, which the patterns can't handle. */
3619 sibcall_insn_operand (op, mode)
3621 enum machine_mode mode ATTRIBUTE_UNUSED;
3623 /* Disallow indirect through a virtual register. This leads to
3624 compiler aborts when trying to eliminate them. */
3625 if (GET_CODE (op) == REG
3626 && (op == arg_pointer_rtx
3627 || op == frame_pointer_rtx
3628 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3629 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3632 /* Explicitly allow SYMBOL_REF even if pic. */
3633 if (GET_CODE (op) == SYMBOL_REF)
3636 /* Otherwise we can only allow register operands. */
3637 return register_operand (op, Pmode);
3641 constant_call_address_operand (op, mode)
3643 enum machine_mode mode ATTRIBUTE_UNUSED;
3645 if (GET_CODE (op) == CONST
3646 && GET_CODE (XEXP (op, 0)) == PLUS
3647 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3648 op = XEXP (XEXP (op, 0), 0);
3649 return GET_CODE (op) == SYMBOL_REF;
3652 /* Match exactly zero and one. */
3655 const0_operand (op, mode)
3657 enum machine_mode mode;
3659 return op == CONST0_RTX (mode);
3663 const1_operand (op, mode)
3665 enum machine_mode mode ATTRIBUTE_UNUSED;
3667 return op == const1_rtx;
3670 /* Match 2, 4, or 8. Used for leal multiplicands. */
3673 const248_operand (op, mode)
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3677 return (GET_CODE (op) == CONST_INT
3678 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3681 /* True if this is a constant appropriate for an increment or decrement. */
3684 incdec_operand (op, mode)
3686 enum machine_mode mode ATTRIBUTE_UNUSED;
3688 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3689 registers, since carry flag is not set. */
3690 if (TARGET_PENTIUM4 && !optimize_size)
3692 return op == const1_rtx || op == constm1_rtx;
3695 /* Return nonzero if OP is acceptable as operand of DImode shift
3699 shiftdi_operand (op, mode)
3701 enum machine_mode mode ATTRIBUTE_UNUSED;
3704 return nonimmediate_operand (op, mode);
3706 return register_operand (op, mode);
3709 /* Return false if this is the stack pointer, or any other fake
3710 register eliminable to the stack pointer. Otherwise, this is
3713 This is used to prevent esp from being used as an index reg.
3714 Which would only happen in pathological cases. */
3717 reg_no_sp_operand (op, mode)
3719 enum machine_mode mode;
3722 if (GET_CODE (t) == SUBREG)
3724 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3727 return register_operand (op, mode);
3731 mmx_reg_operand (op, mode)
3733 enum machine_mode mode ATTRIBUTE_UNUSED;
3735 return MMX_REG_P (op);
3738 /* Return false if this is any eliminable register. Otherwise
3742 general_no_elim_operand (op, mode)
3744 enum machine_mode mode;
3747 if (GET_CODE (t) == SUBREG)
3749 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3750 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3751 || t == virtual_stack_dynamic_rtx)
3754 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3755 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3758 return general_operand (op, mode);
3761 /* Return false if this is any eliminable register. Otherwise
3762 register_operand or const_int. */
3765 nonmemory_no_elim_operand (op, mode)
3767 enum machine_mode mode;
3770 if (GET_CODE (t) == SUBREG)
3772 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3773 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3774 || t == virtual_stack_dynamic_rtx)
3777 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3780 /* Return false if this is any eliminable register or stack register,
3781 otherwise work like register_operand. */
3784 index_register_operand (op, mode)
3786 enum machine_mode mode;
3789 if (GET_CODE (t) == SUBREG)
3793 if (t == arg_pointer_rtx
3794 || t == frame_pointer_rtx
3795 || t == virtual_incoming_args_rtx
3796 || t == virtual_stack_vars_rtx
3797 || t == virtual_stack_dynamic_rtx
3798 || REGNO (t) == STACK_POINTER_REGNUM)
3801 return general_operand (op, mode);
3804 /* Return true if op is a Q_REGS class register. */
3807 q_regs_operand (op, mode)
3809 enum machine_mode mode;
3811 if (mode != VOIDmode && GET_MODE (op) != mode)
3813 if (GET_CODE (op) == SUBREG)
3814 op = SUBREG_REG (op);
3815 return ANY_QI_REG_P (op);
3818 /* Return true if op is an flags register. */
3821 flags_reg_operand (op, mode)
3823 enum machine_mode mode;
3825 if (mode != VOIDmode && GET_MODE (op) != mode)
3827 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3830 /* Return true if op is a NON_Q_REGS class register. */
3833 non_q_regs_operand (op, mode)
3835 enum machine_mode mode;
3837 if (mode != VOIDmode && GET_MODE (op) != mode)
3839 if (GET_CODE (op) == SUBREG)
3840 op = SUBREG_REG (op);
3841 return NON_QI_REG_P (op);
3845 zero_extended_scalar_load_operand (op, mode)
3847 enum machine_mode mode ATTRIBUTE_UNUSED;
3850 if (GET_CODE (op) != MEM)
3852 op = maybe_get_pool_constant (op);
3855 if (GET_CODE (op) != CONST_VECTOR)
3858 (GET_MODE_SIZE (GET_MODE (op)) /
3859 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3860 for (n_elts--; n_elts > 0; n_elts--)
3862 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3863 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3869 /* Return 1 when OP is operand acceptable for standard SSE move. */
3871 vector_move_operand (op, mode)
3873 enum machine_mode mode;
3875 if (nonimmediate_operand (op, mode))
3877 if (GET_MODE (op) != mode && mode != VOIDmode)
3879 return (op == CONST0_RTX (GET_MODE (op)));
3882 /* Return true if op if a valid address, and does not contain
3883 a segment override. */
3886 no_seg_address_operand (op, mode)
3888 enum machine_mode mode;
3890 struct ix86_address parts;
3892 if (! address_operand (op, mode))
3895 if (! ix86_decompose_address (op, &parts))
3898 return parts.seg == SEG_DEFAULT;
3901 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3904 sse_comparison_operator (op, mode)
3906 enum machine_mode mode ATTRIBUTE_UNUSED;
3908 enum rtx_code code = GET_CODE (op);
3911 /* Operations supported directly. */
3921 /* These are equivalent to ones above in non-IEEE comparisons. */
3928 return !TARGET_IEEE_FP;
3933 /* Return 1 if OP is a valid comparison operator in valid mode. */
3935 ix86_comparison_operator (op, mode)
3937 enum machine_mode mode;
3939 enum machine_mode inmode;
3940 enum rtx_code code = GET_CODE (op);
3941 if (mode != VOIDmode && GET_MODE (op) != mode)
3943 if (GET_RTX_CLASS (code) != '<')
3945 inmode = GET_MODE (XEXP (op, 0));
3947 if (inmode == CCFPmode || inmode == CCFPUmode)
3949 enum rtx_code second_code, bypass_code;
3950 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3951 return (bypass_code == NIL && second_code == NIL);
3958 if (inmode == CCmode || inmode == CCGCmode
3959 || inmode == CCGOCmode || inmode == CCNOmode)
3962 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3963 if (inmode == CCmode)
3967 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3975 /* Return 1 if OP is a valid comparison operator testing carry flag
3978 ix86_carry_flag_operator (op, mode)
3980 enum machine_mode mode;
3982 enum machine_mode inmode;
3983 enum rtx_code code = GET_CODE (op);
3985 if (mode != VOIDmode && GET_MODE (op) != mode)
3987 if (GET_RTX_CLASS (code) != '<')
3989 inmode = GET_MODE (XEXP (op, 0));
3990 if (GET_CODE (XEXP (op, 0)) != REG
3991 || REGNO (XEXP (op, 0)) != 17
3992 || XEXP (op, 1) != const0_rtx)
3995 if (inmode == CCFPmode || inmode == CCFPUmode)
3997 enum rtx_code second_code, bypass_code;
3999 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4000 if (bypass_code != NIL || second_code != NIL)
4002 code = ix86_fp_compare_code_to_integer (code);
4004 else if (inmode != CCmode)
4009 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4012 fcmov_comparison_operator (op, mode)
4014 enum machine_mode mode;
4016 enum machine_mode inmode;
4017 enum rtx_code code = GET_CODE (op);
4019 if (mode != VOIDmode && GET_MODE (op) != mode)
4021 if (GET_RTX_CLASS (code) != '<')
4023 inmode = GET_MODE (XEXP (op, 0));
4024 if (inmode == CCFPmode || inmode == CCFPUmode)
4026 enum rtx_code second_code, bypass_code;
4028 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4029 if (bypass_code != NIL || second_code != NIL)
4031 code = ix86_fp_compare_code_to_integer (code);
4033 /* i387 supports just limited amount of conditional codes. */
4036 case LTU: case GTU: case LEU: case GEU:
4037 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4040 case ORDERED: case UNORDERED:
4048 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4051 promotable_binary_operator (op, mode)
4053 enum machine_mode mode ATTRIBUTE_UNUSED;
4055 switch (GET_CODE (op))
4058 /* Modern CPUs have same latency for HImode and SImode multiply,
4059 but 386 and 486 do HImode multiply faster. */
4060 return ix86_tune > PROCESSOR_I486;
4072 /* Nearly general operand, but accept any const_double, since we wish
4073 to be able to drop them into memory rather than have them get pulled
4077 cmp_fp_expander_operand (op, mode)
4079 enum machine_mode mode;
4081 if (mode != VOIDmode && mode != GET_MODE (op))
4083 if (GET_CODE (op) == CONST_DOUBLE)
4085 return general_operand (op, mode);
4088 /* Match an SI or HImode register for a zero_extract. */
4091 ext_register_operand (op, mode)
4093 enum machine_mode mode ATTRIBUTE_UNUSED;
4096 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4097 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4100 if (!register_operand (op, VOIDmode))
4103 /* Be careful to accept only registers having upper parts. */
4104 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4105 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4108 /* Return 1 if this is a valid binary floating-point operation.
4109 OP is the expression matched, and MODE is its mode. */
4112 binary_fp_operator (op, mode)
4114 enum machine_mode mode;
4116 if (mode != VOIDmode && mode != GET_MODE (op))
4119 switch (GET_CODE (op))
4125 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4133 mult_operator (op, mode)
4135 enum machine_mode mode ATTRIBUTE_UNUSED;
4137 return GET_CODE (op) == MULT;
4141 div_operator (op, mode)
4143 enum machine_mode mode ATTRIBUTE_UNUSED;
4145 return GET_CODE (op) == DIV;
4149 arith_or_logical_operator (op, mode)
4151 enum machine_mode mode;
4153 return ((mode == VOIDmode || GET_MODE (op) == mode)
4154 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4155 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4158 /* Returns 1 if OP is memory operand with a displacement. */
4161 memory_displacement_operand (op, mode)
4163 enum machine_mode mode;
4165 struct ix86_address parts;
4167 if (! memory_operand (op, mode))
4170 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4173 return parts.disp != NULL_RTX;
4176 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4177 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4179 ??? It seems likely that this will only work because cmpsi is an
4180 expander, and no actual insns use this. */
4183 cmpsi_operand (op, mode)
4185 enum machine_mode mode;
4187 if (nonimmediate_operand (op, mode))
4190 if (GET_CODE (op) == AND
4191 && GET_MODE (op) == SImode
4192 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4193 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4194 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4195 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4196 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4197 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4203 /* Returns 1 if OP is memory operand that can not be represented by the
4207 long_memory_operand (op, mode)
4209 enum machine_mode mode;
4211 if (! memory_operand (op, mode))
4214 return memory_address_length (op) != 0;
4217 /* Return nonzero if the rtx is known aligned. */
4220 aligned_operand (op, mode)
4222 enum machine_mode mode;
4224 struct ix86_address parts;
4226 if (!general_operand (op, mode))
4229 /* Registers and immediate operands are always "aligned". */
4230 if (GET_CODE (op) != MEM)
4233 /* Don't even try to do any aligned optimizations with volatiles. */
4234 if (MEM_VOLATILE_P (op))
4239 /* Pushes and pops are only valid on the stack pointer. */
4240 if (GET_CODE (op) == PRE_DEC
4241 || GET_CODE (op) == POST_INC)
4244 /* Decode the address. */
4245 if (! ix86_decompose_address (op, &parts))
4248 if (parts.base && GET_CODE (parts.base) == SUBREG)
4249 parts.base = SUBREG_REG (parts.base);
4250 if (parts.index && GET_CODE (parts.index) == SUBREG)
4251 parts.index = SUBREG_REG (parts.index);
4253 /* Look for some component that isn't known to be aligned. */
4257 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4262 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4267 if (GET_CODE (parts.disp) != CONST_INT
4268 || (INTVAL (parts.disp) & 3) != 0)
4272 /* Didn't find one -- this must be an aligned address. */
4276 /* Initialize the table of extra 80387 mathematical constants. */
4279 init_ext_80387_constants ()
4281 static const char * cst[5] =
4283 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4284 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4285 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4286 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4287 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4291 for (i = 0; i < 5; i++)
4293 real_from_string (&ext_80387_constants_table[i], cst[i]);
4294 /* Ensure each constant is rounded to XFmode precision. */
4295 real_convert (&ext_80387_constants_table[i], XFmode,
4296 &ext_80387_constants_table[i]);
4299 ext_80387_constants_init = 1;
4302 /* Return true if the constant is something that can be loaded with
4303 a special instruction. */
4306 standard_80387_constant_p (x)
4309 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4312 if (x == CONST0_RTX (GET_MODE (x)))
4314 if (x == CONST1_RTX (GET_MODE (x)))
4317 /* For XFmode constants, try to find a special 80387 instruction on
4318 those CPUs that benefit from them. */
4319 if (GET_MODE (x) == XFmode
4320 && x86_ext_80387_constants & TUNEMASK)
4325 if (! ext_80387_constants_init)
4326 init_ext_80387_constants ();
4328 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4329 for (i = 0; i < 5; i++)
4330 if (real_identical (&r, &ext_80387_constants_table[i]))
4337 /* Return the opcode of the special instruction to be used to load
4341 standard_80387_constant_opcode (x)
4344 switch (standard_80387_constant_p (x))
4364 /* Return the CONST_DOUBLE representing the 80387 constant that is
4365 loaded by the specified special instruction. The argument IDX
4366 matches the return value from standard_80387_constant_p. */
4369 standard_80387_constant_rtx (idx)
4374 if (! ext_80387_constants_init)
4375 init_ext_80387_constants ();
4391 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4394 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4397 standard_sse_constant_p (x)
4400 if (x == const0_rtx)
4402 return (x == CONST0_RTX (GET_MODE (x)));
4405 /* Returns 1 if OP contains a symbol reference */
4408 symbolic_reference_mentioned_p (op)
4411 register const char *fmt;
4414 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4417 fmt = GET_RTX_FORMAT (GET_CODE (op));
4418 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4424 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4425 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4429 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4436 /* Return 1 if it is appropriate to emit `ret' instructions in the
4437 body of a function. Do this only if the epilogue is simple, needing a
4438 couple of insns. Prior to reloading, we can't tell how many registers
4439 must be saved, so return 0 then. Return 0 if there is no frame
4440 marker to de-allocate.
4442 If NON_SAVING_SETJMP is defined and true, then it is not possible
4443 for the epilogue to be simple, so return 0. This is a special case
4444 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4445 until final, but jump_optimize may need to know sooner if a
4449 ix86_can_use_return_insn_p ()
4451 struct ix86_frame frame;
4453 #ifdef NON_SAVING_SETJMP
4454 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4458 if (! reload_completed || frame_pointer_needed)
4461 /* Don't allow more than 32 pop, since that's all we can do
4462 with one instruction. */
4463 if (current_function_pops_args
4464 && current_function_args_size >= 32768)
4467 ix86_compute_frame_layout (&frame);
4468 return frame.to_allocate == 0 && frame.nregs == 0;
4471 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4473 x86_64_sign_extended_value (value)
4476 switch (GET_CODE (value))
4478 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4479 to be at least 32 and this all acceptable constants are
4480 represented as CONST_INT. */
4482 if (HOST_BITS_PER_WIDE_INT == 32)
4486 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4487 return trunc_int_for_mode (val, SImode) == val;
4491 /* For certain code models, the symbolic references are known to fit.
4492 in CM_SMALL_PIC model we know it fits if it is local to the shared
4493 library. Don't count TLS SYMBOL_REFs here, since they should fit
4494 only if inside of UNSPEC handled below. */
4496 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4498 /* For certain code models, the code is near as well. */
4500 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4501 || ix86_cmodel == CM_KERNEL);
4503 /* We also may accept the offsetted memory references in certain special
4506 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4507 switch (XINT (XEXP (value, 0), 1))
4509 case UNSPEC_GOTPCREL:
4511 case UNSPEC_GOTNTPOFF:
4517 if (GET_CODE (XEXP (value, 0)) == PLUS)
4519 rtx op1 = XEXP (XEXP (value, 0), 0);
4520 rtx op2 = XEXP (XEXP (value, 0), 1);
4521 HOST_WIDE_INT offset;
4523 if (ix86_cmodel == CM_LARGE)
4525 if (GET_CODE (op2) != CONST_INT)
4527 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4528 switch (GET_CODE (op1))
4531 /* For CM_SMALL assume that latest object is 16MB before
4532 end of 31bits boundary. We may also accept pretty
4533 large negative constants knowing that all objects are
4534 in the positive half of address space. */
4535 if (ix86_cmodel == CM_SMALL
4536 && offset < 16*1024*1024
4537 && trunc_int_for_mode (offset, SImode) == offset)
4539 /* For CM_KERNEL we know that all object resist in the
4540 negative half of 32bits address space. We may not
4541 accept negative offsets, since they may be just off
4542 and we may accept pretty large positive ones. */
4543 if (ix86_cmodel == CM_KERNEL
4545 && trunc_int_for_mode (offset, SImode) == offset)
4549 /* These conditions are similar to SYMBOL_REF ones, just the
4550 constraints for code models differ. */
4551 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4552 && offset < 16*1024*1024
4553 && trunc_int_for_mode (offset, SImode) == offset)
4555 if (ix86_cmodel == CM_KERNEL
4557 && trunc_int_for_mode (offset, SImode) == offset)
4561 switch (XINT (op1, 1))
4566 && trunc_int_for_mode (offset, SImode) == offset)
4580 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4582 x86_64_zero_extended_value (value)
4585 switch (GET_CODE (value))
4588 if (HOST_BITS_PER_WIDE_INT == 32)
4589 return (GET_MODE (value) == VOIDmode
4590 && !CONST_DOUBLE_HIGH (value));
4594 if (HOST_BITS_PER_WIDE_INT == 32)
4595 return INTVAL (value) >= 0;
4597 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4600 /* For certain code models, the symbolic references are known to fit. */
4602 return ix86_cmodel == CM_SMALL;
4604 /* For certain code models, the code is near as well. */
4606 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4608 /* We also may accept the offsetted memory references in certain special
4611 if (GET_CODE (XEXP (value, 0)) == PLUS)
4613 rtx op1 = XEXP (XEXP (value, 0), 0);
4614 rtx op2 = XEXP (XEXP (value, 0), 1);
4616 if (ix86_cmodel == CM_LARGE)
4618 switch (GET_CODE (op1))
4622 /* For small code model we may accept pretty large positive
4623 offsets, since one bit is available for free. Negative
4624 offsets are limited by the size of NULL pointer area
4625 specified by the ABI. */
4626 if (ix86_cmodel == CM_SMALL
4627 && GET_CODE (op2) == CONST_INT
4628 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4629 && (trunc_int_for_mode (INTVAL (op2), SImode)
4632 /* ??? For the kernel, we may accept adjustment of
4633 -0x10000000, since we know that it will just convert
4634 negative address space to positive, but perhaps this
4635 is not worthwhile. */
4638 /* These conditions are similar to SYMBOL_REF ones, just the
4639 constraints for code models differ. */
4640 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4641 && GET_CODE (op2) == CONST_INT
4642 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4643 && (trunc_int_for_mode (INTVAL (op2), SImode)
4657 /* Value should be nonzero if functions must have frame pointers.
4658 Zero means the frame pointer need not be set up (and parms may
4659 be accessed via the stack pointer) in functions that seem suitable. */
4662 ix86_frame_pointer_required ()
4664 /* If we accessed previous frames, then the generated code expects
4665 to be able to access the saved ebp value in our frame. */
4666 if (cfun->machine->accesses_prev_frame)
4669 /* Several x86 os'es need a frame pointer for other reasons,
4670 usually pertaining to setjmp. */
4671 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4674 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4675 the frame pointer by default. Turn it back on now if we've not
4676 got a leaf function. */
4677 if (TARGET_OMIT_LEAF_FRAME_POINTER
4678 && (!current_function_is_leaf))
4681 if (current_function_profile)
4687 /* Record that the current function accesses previous call frames. */
4690 ix86_setup_frame_addresses ()
4692 cfun->machine->accesses_prev_frame = 1;
4695 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4696 # define USE_HIDDEN_LINKONCE 1
4698 # define USE_HIDDEN_LINKONCE 0
4701 static int pic_labels_used;
4703 /* Fills in the label name that should be used for a pc thunk for
4704 the given register. */
4707 get_pc_thunk_name (name, regno)
4711 if (USE_HIDDEN_LINKONCE)
4712 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4714 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4718 /* This function generates code for -fpic that loads %ebx with
4719 the return address of the caller and then returns. */
4727 for (regno = 0; regno < 8; ++regno)
4731 if (! ((pic_labels_used >> regno) & 1))
4734 get_pc_thunk_name (name, regno);
4736 if (USE_HIDDEN_LINKONCE)
4740 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4742 TREE_PUBLIC (decl) = 1;
4743 TREE_STATIC (decl) = 1;
4744 DECL_ONE_ONLY (decl) = 1;
4746 (*targetm.asm_out.unique_section) (decl, 0);
4747 named_section (decl, NULL, 0);
4749 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4750 fputs ("\t.hidden\t", asm_out_file);
4751 assemble_name (asm_out_file, name);
4752 fputc ('\n', asm_out_file);
4753 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4758 ASM_OUTPUT_LABEL (asm_out_file, name);
4761 xops[0] = gen_rtx_REG (SImode, regno);
4762 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4763 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4764 output_asm_insn ("ret", xops);
4767 if (NEED_INDICATE_EXEC_STACK)
4768 file_end_indicate_exec_stack ();
4771 /* Emit code for the SET_GOT patterns. */
4774 output_set_got (dest)
4780 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4782 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4784 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4787 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4789 output_asm_insn ("call\t%a2", xops);
4792 /* Output the "canonical" label name ("Lxx$pb") here too. This
4793 is what will be referred to by the Mach-O PIC subsystem. */
4794 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4796 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4797 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4800 output_asm_insn ("pop{l}\t%0", xops);
4805 get_pc_thunk_name (name, REGNO (dest));
4806 pic_labels_used |= 1 << REGNO (dest);
4808 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4809 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4810 output_asm_insn ("call\t%X2", xops);
4813 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4814 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4815 else if (!TARGET_MACHO)
4816 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4821 /* Generate an "push" pattern for input ARG. */
4827 return gen_rtx_SET (VOIDmode,
4829 gen_rtx_PRE_DEC (Pmode,
4830 stack_pointer_rtx)),
4834 /* Return >= 0 if there is an unused call-clobbered register available
4835 for the entire function. */
4838 ix86_select_alt_pic_regnum ()
4840 if (current_function_is_leaf && !current_function_profile)
4843 for (i = 2; i >= 0; --i)
4844 if (!regs_ever_live[i])
4848 return INVALID_REGNUM;
4851 /* Return 1 if we need to save REGNO. */
4853 ix86_save_reg (regno, maybe_eh_return)
4855 int maybe_eh_return;
4857 if (pic_offset_table_rtx
4858 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4859 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4860 || current_function_profile
4861 || current_function_calls_eh_return
4862 || current_function_uses_const_pool))
4864 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4869 if (current_function_calls_eh_return && maybe_eh_return)
4874 unsigned test = EH_RETURN_DATA_REGNO (i);
4875 if (test == INVALID_REGNUM)
4882 return (regs_ever_live[regno]
4883 && !call_used_regs[regno]
4884 && !fixed_regs[regno]
4885 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4888 /* Return number of registers to be saved on the stack. */
4896 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4897 if (ix86_save_reg (regno, true))
4902 /* Return the offset between two registers, one to be eliminated, and the other
4903 its replacement, at the start of a routine. */
4906 ix86_initial_elimination_offset (from, to)
4910 struct ix86_frame frame;
4911 ix86_compute_frame_layout (&frame);
4913 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4914 return frame.hard_frame_pointer_offset;
4915 else if (from == FRAME_POINTER_REGNUM
4916 && to == HARD_FRAME_POINTER_REGNUM)
4917 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4920 if (to != STACK_POINTER_REGNUM)
4922 else if (from == ARG_POINTER_REGNUM)
4923 return frame.stack_pointer_offset;
4924 else if (from != FRAME_POINTER_REGNUM)
4927 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4931 /* Fill structure ix86_frame about frame of currently computed function. */
4934 ix86_compute_frame_layout (frame)
4935 struct ix86_frame *frame;
4937 HOST_WIDE_INT total_size;
4938 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4940 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4941 HOST_WIDE_INT size = get_frame_size ();
4943 frame->nregs = ix86_nsaved_regs ();
4946 /* During reload iteration the amount of registers saved can change.
4947 Recompute the value as needed. Do not recompute when amount of registers
4948 didn't change as reload does mutiple calls to the function and does not
4949 expect the decision to change within single iteration. */
4951 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4953 int count = frame->nregs;
4955 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4956 /* The fast prologue uses move instead of push to save registers. This
4957 is significantly longer, but also executes faster as modern hardware
4958 can execute the moves in parallel, but can't do that for push/pop.
4960 Be careful about choosing what prologue to emit: When function takes
4961 many instructions to execute we may use slow version as well as in
4962 case function is known to be outside hot spot (this is known with
4963 feedback only). Weight the size of function by number of registers
4964 to save as it is cheap to use one or two push instructions but very
4965 slow to use many of them. */
4967 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4968 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4969 || (flag_branch_probabilities
4970 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4971 cfun->machine->use_fast_prologue_epilogue = false;
4973 cfun->machine->use_fast_prologue_epilogue
4974 = !expensive_function_p (count);
4976 if (TARGET_PROLOGUE_USING_MOVE
4977 && cfun->machine->use_fast_prologue_epilogue)
4978 frame->save_regs_using_mov = true;
4980 frame->save_regs_using_mov = false;
4983 /* Skip return address and saved base pointer. */
4984 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4986 frame->hard_frame_pointer_offset = offset;
4988 /* Do some sanity checking of stack_alignment_needed and
4989 preferred_alignment, since i386 port is the only using those features
4990 that may break easily. */
4992 if (size && !stack_alignment_needed)
4994 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4996 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4998 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5001 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5002 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5004 /* Register save area */
5005 offset += frame->nregs * UNITS_PER_WORD;
5008 if (ix86_save_varrargs_registers)
5010 offset += X86_64_VARARGS_SIZE;
5011 frame->va_arg_size = X86_64_VARARGS_SIZE;
5014 frame->va_arg_size = 0;
5016 /* Align start of frame for local function. */
5017 frame->padding1 = ((offset + stack_alignment_needed - 1)
5018 & -stack_alignment_needed) - offset;
5020 offset += frame->padding1;
5022 /* Frame pointer points here. */
5023 frame->frame_pointer_offset = offset;
5027 /* Add outgoing arguments area. Can be skipped if we eliminated
5028 all the function calls as dead code. */
5029 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
5031 offset += current_function_outgoing_args_size;
5032 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5035 frame->outgoing_arguments_size = 0;
5037 /* Align stack boundary. Only needed if we're calling another function
5039 if (!current_function_is_leaf || current_function_calls_alloca)
5040 frame->padding2 = ((offset + preferred_alignment - 1)
5041 & -preferred_alignment) - offset;
5043 frame->padding2 = 0;
5045 offset += frame->padding2;
5047 /* We've reached end of stack frame. */
5048 frame->stack_pointer_offset = offset;
5050 /* Size prologue needs to allocate. */
5051 frame->to_allocate =
5052 (size + frame->padding1 + frame->padding2
5053 + frame->outgoing_arguments_size + frame->va_arg_size);
5055 if (!frame->to_allocate && frame->nregs <= 1)
5056 frame->save_regs_using_mov = false;
5058 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5059 && current_function_is_leaf)
5061 frame->red_zone_size = frame->to_allocate;
5062 if (frame->save_regs_using_mov)
5063 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5064 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5065 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5068 frame->red_zone_size = 0;
5069 frame->to_allocate -= frame->red_zone_size;
5070 frame->stack_pointer_offset -= frame->red_zone_size;
5072 fprintf (stderr, "nregs: %i\n", frame->nregs);
5073 fprintf (stderr, "size: %i\n", size);
5074 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5075 fprintf (stderr, "padding1: %i\n", frame->padding1);
5076 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5077 fprintf (stderr, "padding2: %i\n", frame->padding2);
5078 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5079 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5080 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5081 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5082 frame->hard_frame_pointer_offset);
5083 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5087 /* Emit code to save registers in the prologue. */
5090 ix86_emit_save_regs ()
5095 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5096 if (ix86_save_reg (regno, true))
5098 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5099 RTX_FRAME_RELATED_P (insn) = 1;
5103 /* Emit code to save registers using MOV insns. First register
5104 is restored from POINTER + OFFSET. */
5106 ix86_emit_save_regs_using_mov (pointer, offset)
5108 HOST_WIDE_INT offset;
5113 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5114 if (ix86_save_reg (regno, true))
5116 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5118 gen_rtx_REG (Pmode, regno));
5119 RTX_FRAME_RELATED_P (insn) = 1;
5120 offset += UNITS_PER_WORD;
5124 /* Expand the prologue into a bunch of separate insns. */
5127 ix86_expand_prologue ()
5131 struct ix86_frame frame;
5132 HOST_WIDE_INT allocate;
5134 ix86_compute_frame_layout (&frame);
5136 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5137 slower on all targets. Also sdb doesn't like it. */
5139 if (frame_pointer_needed)
5141 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5142 RTX_FRAME_RELATED_P (insn) = 1;
5144 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5145 RTX_FRAME_RELATED_P (insn) = 1;
5148 allocate = frame.to_allocate;
5150 if (!frame.save_regs_using_mov)
5151 ix86_emit_save_regs ();
5153 allocate += frame.nregs * UNITS_PER_WORD;
5155 /* When using red zone we may start register saving before allocating
5156 the stack frame saving one cycle of the prologue. */
5157 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5158 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5159 : stack_pointer_rtx,
5160 -frame.nregs * UNITS_PER_WORD);
5164 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5166 insn = emit_insn (gen_pro_epilogue_adjust_stack
5167 (stack_pointer_rtx, stack_pointer_rtx,
5168 GEN_INT (-allocate)));
5169 RTX_FRAME_RELATED_P (insn) = 1;
5173 /* ??? Is this only valid for Win32? */
5180 arg0 = gen_rtx_REG (SImode, 0);
5181 emit_move_insn (arg0, GEN_INT (allocate));
5183 sym = gen_rtx_MEM (FUNCTION_MODE,
5184 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5185 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5187 CALL_INSN_FUNCTION_USAGE (insn)
5188 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5189 CALL_INSN_FUNCTION_USAGE (insn));
5191 /* Don't allow scheduling pass to move insns across __alloca
5193 emit_insn (gen_blockage (const0_rtx));
5195 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5197 if (!frame_pointer_needed || !frame.to_allocate)
5198 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5200 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5201 -frame.nregs * UNITS_PER_WORD);
5204 pic_reg_used = false;
5205 if (pic_offset_table_rtx
5206 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5207 || current_function_profile))
5209 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5211 if (alt_pic_reg_used != INVALID_REGNUM)
5212 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5214 pic_reg_used = true;
5219 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5221 /* Even with accurate pre-reload life analysis, we can wind up
5222 deleting all references to the pic register after reload.
5223 Consider if cross-jumping unifies two sides of a branch
5224 controlled by a comparison vs the only read from a global.
5225 In which case, allow the set_got to be deleted, though we're
5226 too late to do anything about the ebx save in the prologue. */
5227 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5230 /* Prevent function calls from be scheduled before the call to mcount.
5231 In the pic_reg_used case, make sure that the got load isn't deleted. */
5232 if (current_function_profile)
5233 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5236 /* Emit code to restore saved registers using MOV insns. First register
5237 is restored from POINTER + OFFSET. */
5239 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5242 int maybe_eh_return;
5246 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5247 if (ix86_save_reg (regno, maybe_eh_return))
5249 emit_move_insn (gen_rtx_REG (Pmode, regno),
5250 adjust_address (gen_rtx_MEM (Pmode, pointer),
5252 offset += UNITS_PER_WORD;
5256 /* Restore function stack, frame, and registers. */
5259 ix86_expand_epilogue (style)
5263 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5264 struct ix86_frame frame;
5265 HOST_WIDE_INT offset;
5267 ix86_compute_frame_layout (&frame);
5269 /* Calculate start of saved registers relative to ebp. Special care
5270 must be taken for the normal return case of a function using
5271 eh_return: the eax and edx registers are marked as saved, but not
5272 restored along this path. */
5273 offset = frame.nregs;
5274 if (current_function_calls_eh_return && style != 2)
5276 offset *= -UNITS_PER_WORD;
5278 /* If we're only restoring one register and sp is not valid then
5279 using a move instruction to restore the register since it's
5280 less work than reloading sp and popping the register.
5282 The default code result in stack adjustment using add/lea instruction,
5283 while this code results in LEAVE instruction (or discrete equivalent),
5284 so it is profitable in some other cases as well. Especially when there
5285 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5286 and there is exactly one register to pop. This heuristic may need some
5287 tuning in future. */
5288 if ((!sp_valid && frame.nregs <= 1)
5289 || (TARGET_EPILOGUE_USING_MOVE
5290 && cfun->machine->use_fast_prologue_epilogue
5291 && (frame.nregs > 1 || frame.to_allocate))
5292 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5293 || (frame_pointer_needed && TARGET_USE_LEAVE
5294 && cfun->machine->use_fast_prologue_epilogue
5295 && frame.nregs == 1)
5296 || current_function_calls_eh_return)
5298 /* Restore registers. We can use ebp or esp to address the memory
5299 locations. If both are available, default to ebp, since offsets
5300 are known to be small. Only exception is esp pointing directly to the
5301 end of block of saved registers, where we may simplify addressing
5304 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5305 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5306 frame.to_allocate, style == 2);
5308 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5309 offset, style == 2);
5311 /* eh_return epilogues need %ecx added to the stack pointer. */
5314 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5316 if (frame_pointer_needed)
5318 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5319 tmp = plus_constant (tmp, UNITS_PER_WORD);
5320 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5322 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5323 emit_move_insn (hard_frame_pointer_rtx, tmp);
5325 emit_insn (gen_pro_epilogue_adjust_stack
5326 (stack_pointer_rtx, sa, const0_rtx));
5330 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5331 tmp = plus_constant (tmp, (frame.to_allocate
5332 + frame.nregs * UNITS_PER_WORD));
5333 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5336 else if (!frame_pointer_needed)
5337 emit_insn (gen_pro_epilogue_adjust_stack
5338 (stack_pointer_rtx, stack_pointer_rtx,
5339 GEN_INT (frame.to_allocate
5340 + frame.nregs * UNITS_PER_WORD)));
5341 /* If not an i386, mov & pop is faster than "leave". */
5342 else if (TARGET_USE_LEAVE || optimize_size
5343 || !cfun->machine->use_fast_prologue_epilogue)
5344 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5347 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5348 hard_frame_pointer_rtx,
5351 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5353 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5358 /* First step is to deallocate the stack frame so that we can
5359 pop the registers. */
5362 if (!frame_pointer_needed)
5364 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5365 hard_frame_pointer_rtx,
5368 else if (frame.to_allocate)
5369 emit_insn (gen_pro_epilogue_adjust_stack
5370 (stack_pointer_rtx, stack_pointer_rtx,
5371 GEN_INT (frame.to_allocate)));
5373 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5374 if (ix86_save_reg (regno, false))
5377 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5379 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5381 if (frame_pointer_needed)
5383 /* Leave results in shorter dependency chains on CPUs that are
5384 able to grok it fast. */
5385 if (TARGET_USE_LEAVE)
5386 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5387 else if (TARGET_64BIT)
5388 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5390 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5394 /* Sibcall epilogues don't want a return instruction. */
5398 if (current_function_pops_args && current_function_args_size)
5400 rtx popc = GEN_INT (current_function_pops_args);
5402 /* i386 can only pop 64K bytes. If asked to pop more, pop
5403 return address, do explicit add, and jump indirectly to the
5406 if (current_function_pops_args >= 65536)
5408 rtx ecx = gen_rtx_REG (SImode, 2);
5410 /* There are is no "pascal" calling convention in 64bit ABI. */
5414 emit_insn (gen_popsi1 (ecx));
5415 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5416 emit_jump_insn (gen_return_indirect_internal (ecx));
5419 emit_jump_insn (gen_return_pop_internal (popc));
5422 emit_jump_insn (gen_return_internal ());
5425 /* Reset from the function's potential modifications. */
5428 ix86_output_function_epilogue (file, size)
5429 FILE *file ATTRIBUTE_UNUSED;
5430 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5432 if (pic_offset_table_rtx)
5433 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5436 /* Extract the parts of an RTL expression that is a valid memory address
5437 for an instruction. Return 0 if the structure of the address is
5438 grossly off. Return -1 if the address contains ASHIFT, so it is not
5439 strictly valid, but still used for computing length of lea instruction. */
5442 ix86_decompose_address (addr, out)
5444 struct ix86_address *out;
5446 rtx base = NULL_RTX;
5447 rtx index = NULL_RTX;
5448 rtx disp = NULL_RTX;
5449 HOST_WIDE_INT scale = 1;
5450 rtx scale_rtx = NULL_RTX;
5452 enum ix86_address_seg seg = SEG_DEFAULT;
5454 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5456 else if (GET_CODE (addr) == PLUS)
5466 addends[n++] = XEXP (op, 1);
5469 while (GET_CODE (op) == PLUS);
5474 for (i = n; i >= 0; --i)
5477 switch (GET_CODE (op))
5482 index = XEXP (op, 0);
5483 scale_rtx = XEXP (op, 1);
5487 if (XINT (op, 1) == UNSPEC_TP
5488 && TARGET_TLS_DIRECT_SEG_REFS
5489 && seg == SEG_DEFAULT)
5490 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5519 else if (GET_CODE (addr) == MULT)
5521 index = XEXP (addr, 0); /* index*scale */
5522 scale_rtx = XEXP (addr, 1);
5524 else if (GET_CODE (addr) == ASHIFT)
5528 /* We're called for lea too, which implements ashift on occasion. */
5529 index = XEXP (addr, 0);
5530 tmp = XEXP (addr, 1);
5531 if (GET_CODE (tmp) != CONST_INT)
5533 scale = INTVAL (tmp);
5534 if ((unsigned HOST_WIDE_INT) scale > 3)
5540 disp = addr; /* displacement */
5542 /* Extract the integral value of scale. */
5545 if (GET_CODE (scale_rtx) != CONST_INT)
5547 scale = INTVAL (scale_rtx);
5550 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5551 if (base && index && scale == 1
5552 && (index == arg_pointer_rtx
5553 || index == frame_pointer_rtx
5554 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5561 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5562 if ((base == hard_frame_pointer_rtx
5563 || base == frame_pointer_rtx
5564 || base == arg_pointer_rtx) && !disp)
5567 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5568 Avoid this by transforming to [%esi+0]. */
5569 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5570 && base && !index && !disp
5572 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5575 /* Special case: encode reg+reg instead of reg*2. */
5576 if (!base && index && scale && scale == 2)
5577 base = index, scale = 1;
5579 /* Special case: scaling cannot be encoded without base or displacement. */
5580 if (!base && !disp && index && scale != 1)
5592 /* Return cost of the memory address x.
5593 For i386, it is better to use a complex address than let gcc copy
5594 the address into a reg and make a new pseudo. But not if the address
5595 requires to two regs - that would mean more pseudos with longer
5598 ix86_address_cost (x)
5601 struct ix86_address parts;
5604 if (!ix86_decompose_address (x, &parts))
5607 if (parts.base && GET_CODE (parts.base) == SUBREG)
5608 parts.base = SUBREG_REG (parts.base);
5609 if (parts.index && GET_CODE (parts.index) == SUBREG)
5610 parts.index = SUBREG_REG (parts.index);
5612 /* More complex memory references are better. */
5613 if (parts.disp && parts.disp != const0_rtx)
5615 if (parts.seg != SEG_DEFAULT)
5618 /* Attempt to minimize number of registers in the address. */
5620 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5622 && (!REG_P (parts.index)
5623 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5627 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5629 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5630 && parts.base != parts.index)
5633 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5634 since it's predecode logic can't detect the length of instructions
5635 and it degenerates to vector decoded. Increase cost of such
5636 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5637 to split such addresses or even refuse such addresses at all.
5639 Following addressing modes are affected:
5644 The first and last case may be avoidable by explicitly coding the zero in
5645 memory address, but I don't have AMD-K6 machine handy to check this
5649 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5650 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5651 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5657 /* If X is a machine specific address (i.e. a symbol or label being
5658 referenced as a displacement from the GOT implemented using an
5659 UNSPEC), then return the base term. Otherwise return X. */
5662 ix86_find_base_term (x)
5669 if (GET_CODE (x) != CONST)
5672 if (GET_CODE (term) == PLUS
5673 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5674 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5675 term = XEXP (term, 0);
5676 if (GET_CODE (term) != UNSPEC
5677 || XINT (term, 1) != UNSPEC_GOTPCREL)
5680 term = XVECEXP (term, 0, 0);
5682 if (GET_CODE (term) != SYMBOL_REF
5683 && GET_CODE (term) != LABEL_REF)
5689 term = ix86_delegitimize_address (x);
5691 if (GET_CODE (term) != SYMBOL_REF
5692 && GET_CODE (term) != LABEL_REF)
5698 /* Determine if a given RTX is a valid constant. We already know this
5699 satisfies CONSTANT_P. */
5702 legitimate_constant_p (x)
5707 switch (GET_CODE (x))
5710 /* TLS symbols are not constant. */
5711 if (tls_symbolic_operand (x, Pmode))
5716 inner = XEXP (x, 0);
5718 /* Offsets of TLS symbols are never valid.
5719 Discourage CSE from creating them. */
5720 if (GET_CODE (inner) == PLUS
5721 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5724 if (GET_CODE (inner) == PLUS)
5726 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5728 inner = XEXP (inner, 0);
5731 /* Only some unspecs are valid as "constants". */
5732 if (GET_CODE (inner) == UNSPEC)
5733 switch (XINT (inner, 1))
5737 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5739 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5749 /* Otherwise we handle everything else in the move patterns. */
5753 /* Determine if it's legal to put X into the constant pool. This
5754 is not possible for the address of thread-local symbols, which
5755 is checked above. */
5758 ix86_cannot_force_const_mem (x)
5761 return !legitimate_constant_p (x);
5764 /* Determine if a given RTX is a valid constant address. */
5767 constant_address_p (x)
5770 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5773 /* Nonzero if the constant value X is a legitimate general operand
5774 when generating PIC code. It is given that flag_pic is on and
5775 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5778 legitimate_pic_operand_p (x)
5783 switch (GET_CODE (x))
5786 inner = XEXP (x, 0);
5788 /* Only some unspecs are valid as "constants". */
5789 if (GET_CODE (inner) == UNSPEC)
5790 switch (XINT (inner, 1))
5793 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5801 return legitimate_pic_address_disp_p (x);
5808 /* Determine if a given CONST RTX is a valid memory displacement
5812 legitimate_pic_address_disp_p (disp)
5817 /* In 64bit mode we can allow direct addresses of symbols and labels
5818 when they are not dynamic symbols. */
5821 /* TLS references should always be enclosed in UNSPEC. */
5822 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5824 if (GET_CODE (disp) == SYMBOL_REF
5825 && ix86_cmodel == CM_SMALL_PIC
5826 && SYMBOL_REF_LOCAL_P (disp))
5828 if (GET_CODE (disp) == LABEL_REF)
5830 if (GET_CODE (disp) == CONST
5831 && GET_CODE (XEXP (disp, 0)) == PLUS
5832 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5833 && ix86_cmodel == CM_SMALL_PIC
5834 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
5835 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5836 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5837 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5838 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5841 if (GET_CODE (disp) != CONST)
5843 disp = XEXP (disp, 0);
5847 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5848 of GOT tables. We should not need these anyway. */
5849 if (GET_CODE (disp) != UNSPEC
5850 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5853 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5854 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5860 if (GET_CODE (disp) == PLUS)
5862 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5864 disp = XEXP (disp, 0);
5868 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5869 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5871 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5872 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5873 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5875 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5876 if (strstr (sym_name, "$pb") != 0)
5881 if (GET_CODE (disp) != UNSPEC)
5884 switch (XINT (disp, 1))
5889 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5891 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5892 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5893 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5895 case UNSPEC_GOTTPOFF:
5896 case UNSPEC_GOTNTPOFF:
5897 case UNSPEC_INDNTPOFF:
5900 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5902 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5904 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5910 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5911 memory address for an instruction. The MODE argument is the machine mode
5912 for the MEM expression that wants to use this address.
5914 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5915 convert common non-canonical forms to canonical form so that they will
5919 legitimate_address_p (mode, addr, strict)
5920 enum machine_mode mode;
5924 struct ix86_address parts;
5925 rtx base, index, disp;
5926 HOST_WIDE_INT scale;
5927 const char *reason = NULL;
5928 rtx reason_rtx = NULL_RTX;
5930 if (TARGET_DEBUG_ADDR)
5933 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5934 GET_MODE_NAME (mode), strict);
5938 if (ix86_decompose_address (addr, &parts) <= 0)
5940 reason = "decomposition failed";
5945 index = parts.index;
5947 scale = parts.scale;
5949 /* Validate base register.
5951 Don't allow SUBREG's here, it can lead to spill failures when the base
5952 is one word out of a two word structure, which is represented internally
5960 if (GET_CODE (base) == SUBREG)
5961 reg = SUBREG_REG (base);
5965 if (GET_CODE (reg) != REG)
5967 reason = "base is not a register";
5971 if (GET_MODE (base) != Pmode)
5973 reason = "base is not in Pmode";
5977 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5978 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5980 reason = "base is not valid";
5985 /* Validate index register.
5987 Don't allow SUBREG's here, it can lead to spill failures when the index
5988 is one word out of a two word structure, which is represented internally
5996 if (GET_CODE (index) == SUBREG)
5997 reg = SUBREG_REG (index);
6001 if (GET_CODE (reg) != REG)
6003 reason = "index is not a register";
6007 if (GET_MODE (index) != Pmode)
6009 reason = "index is not in Pmode";
6013 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6014 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6016 reason = "index is not valid";
6021 /* Validate scale factor. */
6024 reason_rtx = GEN_INT (scale);
6027 reason = "scale without index";
6031 if (scale != 2 && scale != 4 && scale != 8)
6033 reason = "scale is not a valid multiplier";
6038 /* Validate displacement. */
6043 if (GET_CODE (disp) == CONST
6044 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6045 switch (XINT (XEXP (disp, 0), 1))
6049 case UNSPEC_GOTPCREL:
6052 goto is_legitimate_pic;
6054 case UNSPEC_GOTTPOFF:
6055 case UNSPEC_GOTNTPOFF:
6056 case UNSPEC_INDNTPOFF:
6062 reason = "invalid address unspec";
6066 else if (flag_pic && (SYMBOLIC_CONST (disp)
6068 && !machopic_operand_p (disp)
6073 if (TARGET_64BIT && (index || base))
6075 /* foo@dtpoff(%rX) is ok. */
6076 if (GET_CODE (disp) != CONST
6077 || GET_CODE (XEXP (disp, 0)) != PLUS
6078 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6079 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6080 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6081 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6083 reason = "non-constant pic memory reference";
6087 else if (! legitimate_pic_address_disp_p (disp))
6089 reason = "displacement is an invalid pic construct";
6093 /* This code used to verify that a symbolic pic displacement
6094 includes the pic_offset_table_rtx register.
6096 While this is good idea, unfortunately these constructs may
6097 be created by "adds using lea" optimization for incorrect
6106 This code is nonsensical, but results in addressing
6107 GOT table with pic_offset_table_rtx base. We can't
6108 just refuse it easily, since it gets matched by
6109 "addsi3" pattern, that later gets split to lea in the
6110 case output register differs from input. While this
6111 can be handled by separate addsi pattern for this case
6112 that never results in lea, this seems to be easier and
6113 correct fix for crash to disable this test. */
6115 else if (GET_CODE (disp) != LABEL_REF
6116 && GET_CODE (disp) != CONST_INT
6117 && (GET_CODE (disp) != CONST
6118 || !legitimate_constant_p (disp))
6119 && (GET_CODE (disp) != SYMBOL_REF
6120 || !legitimate_constant_p (disp)))
6122 reason = "displacement is not constant";
6125 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6127 reason = "displacement is out of range";
6132 /* Everything looks valid. */
6133 if (TARGET_DEBUG_ADDR)
6134 fprintf (stderr, "Success.\n");
6138 if (TARGET_DEBUG_ADDR)
6140 fprintf (stderr, "Error: %s\n", reason);
6141 debug_rtx (reason_rtx);
6146 /* Return an unique alias set for the GOT. */
6148 static HOST_WIDE_INT
6149 ix86_GOT_alias_set ()
6151 static HOST_WIDE_INT set = -1;
6153 set = new_alias_set ();
6157 /* Return a legitimate reference for ORIG (an address) using the
6158 register REG. If REG is 0, a new pseudo is generated.
6160 There are two types of references that must be handled:
6162 1. Global data references must load the address from the GOT, via
6163 the PIC reg. An insn is emitted to do this load, and the reg is
6166 2. Static data references, constant pool addresses, and code labels
6167 compute the address as an offset from the GOT, whose base is in
6168 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6169 differentiate them from global data objects. The returned
6170 address is the PIC reg + an unspec constant.
6172 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6173 reg also appears in the address. */
6176 legitimize_pic_address (orig, reg)
6186 reg = gen_reg_rtx (Pmode);
6187 /* Use the generic Mach-O PIC machinery. */
6188 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6191 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6193 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6195 /* This symbol may be referenced via a displacement from the PIC
6196 base address (@GOTOFF). */
6198 if (reload_in_progress)
6199 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6200 if (GET_CODE (addr) == CONST)
6201 addr = XEXP (addr, 0);
6202 if (GET_CODE (addr) == PLUS)
6204 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6205 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6208 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6209 new = gen_rtx_CONST (Pmode, new);
6210 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6214 emit_move_insn (reg, new);
6218 else if (GET_CODE (addr) == SYMBOL_REF)
6222 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6223 new = gen_rtx_CONST (Pmode, new);
6224 new = gen_rtx_MEM (Pmode, new);
6225 RTX_UNCHANGING_P (new) = 1;
6226 set_mem_alias_set (new, ix86_GOT_alias_set ());
6229 reg = gen_reg_rtx (Pmode);
6230 /* Use directly gen_movsi, otherwise the address is loaded
6231 into register for CSE. We don't want to CSE this addresses,
6232 instead we CSE addresses from the GOT table, so skip this. */
6233 emit_insn (gen_movsi (reg, new));
6238 /* This symbol must be referenced via a load from the
6239 Global Offset Table (@GOT). */
6241 if (reload_in_progress)
6242 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6243 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6244 new = gen_rtx_CONST (Pmode, new);
6245 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6246 new = gen_rtx_MEM (Pmode, new);
6247 RTX_UNCHANGING_P (new) = 1;
6248 set_mem_alias_set (new, ix86_GOT_alias_set ());
6251 reg = gen_reg_rtx (Pmode);
6252 emit_move_insn (reg, new);
6258 if (GET_CODE (addr) == CONST)
6260 addr = XEXP (addr, 0);
6262 /* We must match stuff we generate before. Assume the only
6263 unspecs that can get here are ours. Not that we could do
6264 anything with them anyway... */
6265 if (GET_CODE (addr) == UNSPEC
6266 || (GET_CODE (addr) == PLUS
6267 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6269 if (GET_CODE (addr) != PLUS)
6272 if (GET_CODE (addr) == PLUS)
6274 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6276 /* Check first to see if this is a constant offset from a @GOTOFF
6277 symbol reference. */
6278 if (local_symbolic_operand (op0, Pmode)
6279 && GET_CODE (op1) == CONST_INT)
6283 if (reload_in_progress)
6284 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6285 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6287 new = gen_rtx_PLUS (Pmode, new, op1);
6288 new = gen_rtx_CONST (Pmode, new);
6289 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6293 emit_move_insn (reg, new);
6299 if (INTVAL (op1) < -16*1024*1024
6300 || INTVAL (op1) >= 16*1024*1024)
6301 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6306 base = legitimize_pic_address (XEXP (addr, 0), reg);
6307 new = legitimize_pic_address (XEXP (addr, 1),
6308 base == reg ? NULL_RTX : reg);
6310 if (GET_CODE (new) == CONST_INT)
6311 new = plus_constant (base, INTVAL (new));
6314 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6316 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6317 new = XEXP (new, 1);
6319 new = gen_rtx_PLUS (Pmode, base, new);
6327 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6330 get_thread_pointer (to_reg)
6335 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6339 reg = gen_reg_rtx (Pmode);
6340 insn = gen_rtx_SET (VOIDmode, reg, tp);
6341 insn = emit_insn (insn);
6346 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6347 false if we expect this to be used for a memory address and true if
6348 we expect to load the address into a register. */
6351 legitimize_tls_address (x, model, for_mov)
6353 enum tls_model model;
6356 rtx dest, base, off, pic;
6361 case TLS_MODEL_GLOBAL_DYNAMIC:
6362 dest = gen_reg_rtx (Pmode);
6365 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6368 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6369 insns = get_insns ();
6372 emit_libcall_block (insns, dest, rax, x);
6375 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6378 case TLS_MODEL_LOCAL_DYNAMIC:
6379 base = gen_reg_rtx (Pmode);
6382 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6385 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6386 insns = get_insns ();
6389 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6390 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6391 emit_libcall_block (insns, base, rax, note);
6394 emit_insn (gen_tls_local_dynamic_base_32 (base));
6396 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6397 off = gen_rtx_CONST (Pmode, off);
6399 return gen_rtx_PLUS (Pmode, base, off);
6401 case TLS_MODEL_INITIAL_EXEC:
6405 type = UNSPEC_GOTNTPOFF;
6409 if (reload_in_progress)
6410 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6411 pic = pic_offset_table_rtx;
6412 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6414 else if (!TARGET_GNU_TLS)
6416 pic = gen_reg_rtx (Pmode);
6417 emit_insn (gen_set_got (pic));
6418 type = UNSPEC_GOTTPOFF;
6423 type = UNSPEC_INDNTPOFF;
6426 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6427 off = gen_rtx_CONST (Pmode, off);
6429 off = gen_rtx_PLUS (Pmode, pic, off);
6430 off = gen_rtx_MEM (Pmode, off);
6431 RTX_UNCHANGING_P (off) = 1;
6432 set_mem_alias_set (off, ix86_GOT_alias_set ());
6434 if (TARGET_64BIT || TARGET_GNU_TLS)
6436 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6437 off = force_reg (Pmode, off);
6438 return gen_rtx_PLUS (Pmode, base, off);
6442 base = get_thread_pointer (true);
6443 dest = gen_reg_rtx (Pmode);
6444 emit_insn (gen_subsi3 (dest, base, off));
6448 case TLS_MODEL_LOCAL_EXEC:
6449 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6450 (TARGET_64BIT || TARGET_GNU_TLS)
6451 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6452 off = gen_rtx_CONST (Pmode, off);
6454 if (TARGET_64BIT || TARGET_GNU_TLS)
6456 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6457 return gen_rtx_PLUS (Pmode, base, off);
6461 base = get_thread_pointer (true);
6462 dest = gen_reg_rtx (Pmode);
6463 emit_insn (gen_subsi3 (dest, base, off));
6474 /* Try machine-dependent ways of modifying an illegitimate address
6475 to be legitimate. If we find one, return the new, valid address.
6476 This macro is used in only one place: `memory_address' in explow.c.
6478 OLDX is the address as it was before break_out_memory_refs was called.
6479 In some cases it is useful to look at this to decide what needs to be done.
6481 MODE and WIN are passed so that this macro can use
6482 GO_IF_LEGITIMATE_ADDRESS.
6484 It is always safe for this macro to do nothing. It exists to recognize
6485 opportunities to optimize the output.
6487 For the 80386, we handle X+REG by loading X into a register R and
6488 using R+REG. R will go in a general reg and indexing will be used.
6489 However, if REG is a broken-out memory address or multiplication,
6490 nothing needs to be done because REG can certainly go in a general reg.
6492 When -fpic is used, special handling is needed for symbolic references.
6493 See comments by legitimize_pic_address in i386.c for details. */
6496 legitimize_address (x, oldx, mode)
6498 register rtx oldx ATTRIBUTE_UNUSED;
6499 enum machine_mode mode;
6504 if (TARGET_DEBUG_ADDR)
6506 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6507 GET_MODE_NAME (mode));
6511 log = tls_symbolic_operand (x, mode);
6513 return legitimize_tls_address (x, log, false);
6515 if (flag_pic && SYMBOLIC_CONST (x))
6516 return legitimize_pic_address (x, 0);
6518 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6519 if (GET_CODE (x) == ASHIFT
6520 && GET_CODE (XEXP (x, 1)) == CONST_INT
6521 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6524 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6525 GEN_INT (1 << log));
6528 if (GET_CODE (x) == PLUS)
6530 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6532 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6533 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6534 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6537 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6538 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6539 GEN_INT (1 << log));
6542 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6543 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6544 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6547 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6548 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6549 GEN_INT (1 << log));
6552 /* Put multiply first if it isn't already. */
6553 if (GET_CODE (XEXP (x, 1)) == MULT)
6555 rtx tmp = XEXP (x, 0);
6556 XEXP (x, 0) = XEXP (x, 1);
6561 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6562 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6563 created by virtual register instantiation, register elimination, and
6564 similar optimizations. */
6565 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6568 x = gen_rtx_PLUS (Pmode,
6569 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6570 XEXP (XEXP (x, 1), 0)),
6571 XEXP (XEXP (x, 1), 1));
6575 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6576 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6577 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6578 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6579 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6580 && CONSTANT_P (XEXP (x, 1)))
6583 rtx other = NULL_RTX;
6585 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6587 constant = XEXP (x, 1);
6588 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6590 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6592 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6593 other = XEXP (x, 1);
6601 x = gen_rtx_PLUS (Pmode,
6602 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6603 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6604 plus_constant (other, INTVAL (constant)));
6608 if (changed && legitimate_address_p (mode, x, FALSE))
6611 if (GET_CODE (XEXP (x, 0)) == MULT)
6614 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6617 if (GET_CODE (XEXP (x, 1)) == MULT)
6620 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6624 && GET_CODE (XEXP (x, 1)) == REG
6625 && GET_CODE (XEXP (x, 0)) == REG)
6628 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6631 x = legitimize_pic_address (x, 0);
6634 if (changed && legitimate_address_p (mode, x, FALSE))
6637 if (GET_CODE (XEXP (x, 0)) == REG)
6639 register rtx temp = gen_reg_rtx (Pmode);
6640 register rtx val = force_operand (XEXP (x, 1), temp);
6642 emit_move_insn (temp, val);
6648 else if (GET_CODE (XEXP (x, 1)) == REG)
6650 register rtx temp = gen_reg_rtx (Pmode);
6651 register rtx val = force_operand (XEXP (x, 0), temp);
6653 emit_move_insn (temp, val);
6663 /* Print an integer constant expression in assembler syntax. Addition
6664 and subtraction are the only arithmetic that may appear in these
6665 expressions. FILE is the stdio stream to write to, X is the rtx, and
6666 CODE is the operand print code from the output string. */
6669 output_pic_addr_const (file, x, code)
6676 switch (GET_CODE (x))
6686 assemble_name (file, XSTR (x, 0));
6687 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6688 fputs ("@PLT", file);
6695 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6696 assemble_name (asm_out_file, buf);
6700 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6704 /* This used to output parentheses around the expression,
6705 but that does not work on the 386 (either ATT or BSD assembler). */
6706 output_pic_addr_const (file, XEXP (x, 0), code);
6710 if (GET_MODE (x) == VOIDmode)
6712 /* We can use %d if the number is <32 bits and positive. */
6713 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6714 fprintf (file, "0x%lx%08lx",
6715 (unsigned long) CONST_DOUBLE_HIGH (x),
6716 (unsigned long) CONST_DOUBLE_LOW (x));
6718 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6721 /* We can't handle floating point constants;
6722 PRINT_OPERAND must handle them. */
6723 output_operand_lossage ("floating constant misused");
6727 /* Some assemblers need integer constants to appear first. */
6728 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6730 output_pic_addr_const (file, XEXP (x, 0), code);
6732 output_pic_addr_const (file, XEXP (x, 1), code);
6734 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6736 output_pic_addr_const (file, XEXP (x, 1), code);
6738 output_pic_addr_const (file, XEXP (x, 0), code);
6746 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6747 output_pic_addr_const (file, XEXP (x, 0), code);
6749 output_pic_addr_const (file, XEXP (x, 1), code);
6751 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6755 if (XVECLEN (x, 0) != 1)
6757 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6758 switch (XINT (x, 1))
6761 fputs ("@GOT", file);
6764 fputs ("@GOTOFF", file);
6766 case UNSPEC_GOTPCREL:
6767 fputs ("@GOTPCREL(%rip)", file);
6769 case UNSPEC_GOTTPOFF:
6770 /* FIXME: This might be @TPOFF in Sun ld too. */
6771 fputs ("@GOTTPOFF", file);
6774 fputs ("@TPOFF", file);
6778 fputs ("@TPOFF", file);
6780 fputs ("@NTPOFF", file);
6783 fputs ("@DTPOFF", file);
6785 case UNSPEC_GOTNTPOFF:
6787 fputs ("@GOTTPOFF(%rip)", file);
6789 fputs ("@GOTNTPOFF", file);
6791 case UNSPEC_INDNTPOFF:
6792 fputs ("@INDNTPOFF", file);
6795 output_operand_lossage ("invalid UNSPEC as operand");
6801 output_operand_lossage ("invalid expression as operand");
6805 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6806 We need to handle our special PIC relocations. */
6809 i386_dwarf_output_addr_const (file, x)
6814 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6818 fprintf (file, "%s", ASM_LONG);
6821 output_pic_addr_const (file, x, '\0');
6823 output_addr_const (file, x);
6827 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6828 We need to emit DTP-relative relocations. */
6831 i386_output_dwarf_dtprel (file, size, x)
6836 fputs (ASM_LONG, file);
6837 output_addr_const (file, x);
6838 fputs ("@DTPOFF", file);
6844 fputs (", 0", file);
6851 /* In the name of slightly smaller debug output, and to cater to
6852 general assembler losage, recognize PIC+GOTOFF and turn it back
6853 into a direct symbol reference. */
6856 ix86_delegitimize_address (orig_x)
6861 if (GET_CODE (x) == MEM)
6866 if (GET_CODE (x) != CONST
6867 || GET_CODE (XEXP (x, 0)) != UNSPEC
6868 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6869 || GET_CODE (orig_x) != MEM)
6871 return XVECEXP (XEXP (x, 0), 0, 0);
6874 if (GET_CODE (x) != PLUS
6875 || GET_CODE (XEXP (x, 1)) != CONST)
6878 if (GET_CODE (XEXP (x, 0)) == REG
6879 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6880 /* %ebx + GOT/GOTOFF */
6882 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6884 /* %ebx + %reg * scale + GOT/GOTOFF */
6886 if (GET_CODE (XEXP (y, 0)) == REG
6887 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6889 else if (GET_CODE (XEXP (y, 1)) == REG
6890 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6894 if (GET_CODE (y) != REG
6895 && GET_CODE (y) != MULT
6896 && GET_CODE (y) != ASHIFT)
6902 x = XEXP (XEXP (x, 1), 0);
6903 if (GET_CODE (x) == UNSPEC
6904 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6905 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6908 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6909 return XVECEXP (x, 0, 0);
6912 if (GET_CODE (x) == PLUS
6913 && GET_CODE (XEXP (x, 0)) == UNSPEC
6914 && GET_CODE (XEXP (x, 1)) == CONST_INT
6915 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6916 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6917 && GET_CODE (orig_x) != MEM)))
6919 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6921 return gen_rtx_PLUS (Pmode, y, x);
6929 put_condition_code (code, mode, reverse, fp, file)
6931 enum machine_mode mode;
6937 if (mode == CCFPmode || mode == CCFPUmode)
6939 enum rtx_code second_code, bypass_code;
6940 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6941 if (bypass_code != NIL || second_code != NIL)
6943 code = ix86_fp_compare_code_to_integer (code);
6947 code = reverse_condition (code);
6958 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6963 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6964 Those same assemblers have the same but opposite losage on cmov. */
6967 suffix = fp ? "nbe" : "a";
6970 if (mode == CCNOmode || mode == CCGOCmode)
6972 else if (mode == CCmode || mode == CCGCmode)
6983 if (mode == CCNOmode || mode == CCGOCmode)
6985 else if (mode == CCmode || mode == CCGCmode)
6994 suffix = fp ? "nb" : "ae";
6997 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7007 suffix = fp ? "u" : "p";
7010 suffix = fp ? "nu" : "np";
7015 fputs (suffix, file);
7019 print_reg (x, code, file)
7024 if (REGNO (x) == ARG_POINTER_REGNUM
7025 || REGNO (x) == FRAME_POINTER_REGNUM
7026 || REGNO (x) == FLAGS_REG
7027 || REGNO (x) == FPSR_REG)
7030 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7033 if (code == 'w' || MMX_REG_P (x))
7035 else if (code == 'b')
7037 else if (code == 'k')
7039 else if (code == 'q')
7041 else if (code == 'y')
7043 else if (code == 'h')
7046 code = GET_MODE_SIZE (GET_MODE (x));
7048 /* Irritatingly, AMD extended registers use different naming convention
7049 from the normal registers. */
7050 if (REX_INT_REG_P (x))
7057 error ("extended registers have no high halves");
7060 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7063 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7066 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7069 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7072 error ("unsupported operand size for extended register");
7080 if (STACK_TOP_P (x))
7082 fputs ("st(0)", file);
7089 if (! ANY_FP_REG_P (x))
7090 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7094 fputs (hi_reg_name[REGNO (x)], file);
7097 fputs (qi_reg_name[REGNO (x)], file);
7100 fputs (qi_high_reg_name[REGNO (x)], file);
7107 /* Locate some local-dynamic symbol still in use by this function
7108 so that we can print its name in some tls_local_dynamic_base
7112 get_some_local_dynamic_name ()
7116 if (cfun->machine->some_ld_name)
7117 return cfun->machine->some_ld_name;
7119 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7121 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7122 return cfun->machine->some_ld_name;
7128 get_some_local_dynamic_name_1 (px, data)
7130 void *data ATTRIBUTE_UNUSED;
7134 if (GET_CODE (x) == SYMBOL_REF
7135 && local_dynamic_symbolic_operand (x, Pmode))
7137 cfun->machine->some_ld_name = XSTR (x, 0);
7145 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7146 C -- print opcode suffix for set/cmov insn.
7147 c -- like C, but print reversed condition
7148 F,f -- likewise, but for floating-point.
7149 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7151 R -- print the prefix for register names.
7152 z -- print the opcode suffix for the size of the current operand.
7153 * -- print a star (in certain assembler syntax)
7154 A -- print an absolute memory reference.
7155 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7156 s -- print a shift double count, followed by the assemblers argument
7158 b -- print the QImode name of the register for the indicated operand.
7159 %b0 would print %al if operands[0] is reg 0.
7160 w -- likewise, print the HImode name of the register.
7161 k -- likewise, print the SImode name of the register.
7162 q -- likewise, print the DImode name of the register.
7163 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7164 y -- print "st(0)" instead of "st" as a register.
7165 D -- print condition for SSE cmp instruction.
7166 P -- if PIC, print an @PLT suffix.
7167 X -- don't print any sort of PIC '@' suffix for a symbol.
7168 & -- print some in-use local-dynamic symbol name.
7172 print_operand (file, x, code)
7182 if (ASSEMBLER_DIALECT == ASM_ATT)
7187 assemble_name (file, get_some_local_dynamic_name ());
7191 if (ASSEMBLER_DIALECT == ASM_ATT)
7193 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7195 /* Intel syntax. For absolute addresses, registers should not
7196 be surrounded by braces. */
7197 if (GET_CODE (x) != REG)
7200 PRINT_OPERAND (file, x, 0);
7208 PRINT_OPERAND (file, x, 0);
7213 if (ASSEMBLER_DIALECT == ASM_ATT)
7218 if (ASSEMBLER_DIALECT == ASM_ATT)
7223 if (ASSEMBLER_DIALECT == ASM_ATT)
7228 if (ASSEMBLER_DIALECT == ASM_ATT)
7233 if (ASSEMBLER_DIALECT == ASM_ATT)
7238 if (ASSEMBLER_DIALECT == ASM_ATT)
7243 /* 387 opcodes don't get size suffixes if the operands are
7245 if (STACK_REG_P (x))
7248 /* Likewise if using Intel opcodes. */
7249 if (ASSEMBLER_DIALECT == ASM_INTEL)
7252 /* This is the size of op from size of operand. */
7253 switch (GET_MODE_SIZE (GET_MODE (x)))
7256 #ifdef HAVE_GAS_FILDS_FISTS
7262 if (GET_MODE (x) == SFmode)
7277 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7279 #ifdef GAS_MNEMONICS
7305 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7307 PRINT_OPERAND (file, x, 0);
7313 /* Little bit of braindamage here. The SSE compare instructions
7314 does use completely different names for the comparisons that the
7315 fp conditional moves. */
7316 switch (GET_CODE (x))
7331 fputs ("unord", file);
7335 fputs ("neq", file);
7339 fputs ("nlt", file);
7343 fputs ("nle", file);
7346 fputs ("ord", file);
7354 #ifdef CMOV_SUN_AS_SYNTAX
7355 if (ASSEMBLER_DIALECT == ASM_ATT)
7357 switch (GET_MODE (x))
7359 case HImode: putc ('w', file); break;
7361 case SFmode: putc ('l', file); break;
7363 case DFmode: putc ('q', file); break;
7371 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7374 #ifdef CMOV_SUN_AS_SYNTAX
7375 if (ASSEMBLER_DIALECT == ASM_ATT)
7378 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7381 /* Like above, but reverse condition */
7383 /* Check to see if argument to %c is really a constant
7384 and not a condition code which needs to be reversed. */
7385 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7387 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7390 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7393 #ifdef CMOV_SUN_AS_SYNTAX
7394 if (ASSEMBLER_DIALECT == ASM_ATT)
7397 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7403 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7406 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7409 int pred_val = INTVAL (XEXP (x, 0));
7411 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7412 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7414 int taken = pred_val > REG_BR_PROB_BASE / 2;
7415 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7417 /* Emit hints only in the case default branch prediction
7418 heuristics would fail. */
7419 if (taken != cputaken)
7421 /* We use 3e (DS) prefix for taken branches and
7422 2e (CS) prefix for not taken branches. */
7424 fputs ("ds ; ", file);
7426 fputs ("cs ; ", file);
7433 output_operand_lossage ("invalid operand code `%c'", code);
7437 if (GET_CODE (x) == REG)
7439 PRINT_REG (x, code, file);
7442 else if (GET_CODE (x) == MEM)
7444 /* No `byte ptr' prefix for call instructions. */
7445 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7448 switch (GET_MODE_SIZE (GET_MODE (x)))
7450 case 1: size = "BYTE"; break;
7451 case 2: size = "WORD"; break;
7452 case 4: size = "DWORD"; break;
7453 case 8: size = "QWORD"; break;
7454 case 12: size = "XWORD"; break;
7455 case 16: size = "XMMWORD"; break;
7460 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7463 else if (code == 'w')
7465 else if (code == 'k')
7469 fputs (" PTR ", file);
7473 /* Avoid (%rip) for call operands. */
7474 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7475 && GET_CODE (x) != CONST_INT)
7476 output_addr_const (file, x);
7477 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7478 output_operand_lossage ("invalid constraints for operand");
7483 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7488 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7489 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7491 if (ASSEMBLER_DIALECT == ASM_ATT)
7493 fprintf (file, "0x%lx", l);
7496 /* These float cases don't actually occur as immediate operands. */
7497 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7501 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7502 fprintf (file, "%s", dstr);
7505 else if (GET_CODE (x) == CONST_DOUBLE
7506 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7510 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7511 fprintf (file, "%s", dstr);
7518 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7520 if (ASSEMBLER_DIALECT == ASM_ATT)
7523 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7524 || GET_CODE (x) == LABEL_REF)
7526 if (ASSEMBLER_DIALECT == ASM_ATT)
7529 fputs ("OFFSET FLAT:", file);
7532 if (GET_CODE (x) == CONST_INT)
7533 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7535 output_pic_addr_const (file, x, code);
7537 output_addr_const (file, x);
7541 /* Print a memory operand whose address is ADDR. */
7544 print_operand_address (file, addr)
7548 struct ix86_address parts;
7549 rtx base, index, disp;
7552 if (! ix86_decompose_address (addr, &parts))
7556 index = parts.index;
7558 scale = parts.scale;
7566 if (USER_LABEL_PREFIX[0] == 0)
7568 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7574 if (!base && !index)
7576 /* Displacement only requires special attention. */
7578 if (GET_CODE (disp) == CONST_INT)
7580 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7582 if (USER_LABEL_PREFIX[0] == 0)
7584 fputs ("ds:", file);
7586 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7589 output_pic_addr_const (file, disp, 0);
7591 output_addr_const (file, disp);
7593 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7595 && ((GET_CODE (disp) == SYMBOL_REF
7596 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7597 || GET_CODE (disp) == LABEL_REF
7598 || (GET_CODE (disp) == CONST
7599 && GET_CODE (XEXP (disp, 0)) == PLUS
7600 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7601 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7602 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7603 fputs ("(%rip)", file);
7607 if (ASSEMBLER_DIALECT == ASM_ATT)
7612 output_pic_addr_const (file, disp, 0);
7613 else if (GET_CODE (disp) == LABEL_REF)
7614 output_asm_label (disp);
7616 output_addr_const (file, disp);
7621 PRINT_REG (base, 0, file);
7625 PRINT_REG (index, 0, file);
7627 fprintf (file, ",%d", scale);
7633 rtx offset = NULL_RTX;
7637 /* Pull out the offset of a symbol; print any symbol itself. */
7638 if (GET_CODE (disp) == CONST
7639 && GET_CODE (XEXP (disp, 0)) == PLUS
7640 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7642 offset = XEXP (XEXP (disp, 0), 1);
7643 disp = gen_rtx_CONST (VOIDmode,
7644 XEXP (XEXP (disp, 0), 0));
7648 output_pic_addr_const (file, disp, 0);
7649 else if (GET_CODE (disp) == LABEL_REF)
7650 output_asm_label (disp);
7651 else if (GET_CODE (disp) == CONST_INT)
7654 output_addr_const (file, disp);
7660 PRINT_REG (base, 0, file);
7663 if (INTVAL (offset) >= 0)
7665 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7669 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7676 PRINT_REG (index, 0, file);
7678 fprintf (file, "*%d", scale);
7686 output_addr_const_extra (file, x)
7692 if (GET_CODE (x) != UNSPEC)
7695 op = XVECEXP (x, 0, 0);
7696 switch (XINT (x, 1))
7698 case UNSPEC_GOTTPOFF:
7699 output_addr_const (file, op);
7700 /* FIXME: This might be @TPOFF in Sun ld. */
7701 fputs ("@GOTTPOFF", file);
7704 output_addr_const (file, op);
7705 fputs ("@TPOFF", file);
7708 output_addr_const (file, op);
7710 fputs ("@TPOFF", file);
7712 fputs ("@NTPOFF", file);
7715 output_addr_const (file, op);
7716 fputs ("@DTPOFF", file);
7718 case UNSPEC_GOTNTPOFF:
7719 output_addr_const (file, op);
7721 fputs ("@GOTTPOFF(%rip)", file);
7723 fputs ("@GOTNTPOFF", file);
7725 case UNSPEC_INDNTPOFF:
7726 output_addr_const (file, op);
7727 fputs ("@INDNTPOFF", file);
7737 /* Split one or more DImode RTL references into pairs of SImode
7738 references. The RTL can be REG, offsettable MEM, integer constant, or
7739 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7740 split and "num" is its length. lo_half and hi_half are output arrays
7741 that parallel "operands". */
7744 split_di (operands, num, lo_half, hi_half)
7747 rtx lo_half[], hi_half[];
7751 rtx op = operands[num];
7753 /* simplify_subreg refuse to split volatile memory addresses,
7754 but we still have to handle it. */
7755 if (GET_CODE (op) == MEM)
7757 lo_half[num] = adjust_address (op, SImode, 0);
7758 hi_half[num] = adjust_address (op, SImode, 4);
7762 lo_half[num] = simplify_gen_subreg (SImode, op,
7763 GET_MODE (op) == VOIDmode
7764 ? DImode : GET_MODE (op), 0);
7765 hi_half[num] = simplify_gen_subreg (SImode, op,
7766 GET_MODE (op) == VOIDmode
7767 ? DImode : GET_MODE (op), 4);
7771 /* Split one or more TImode RTL references into pairs of SImode
7772 references. The RTL can be REG, offsettable MEM, integer constant, or
7773 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7774 split and "num" is its length. lo_half and hi_half are output arrays
7775 that parallel "operands". */
7778 split_ti (operands, num, lo_half, hi_half)
7781 rtx lo_half[], hi_half[];
7785 rtx op = operands[num];
7787 /* simplify_subreg refuse to split volatile memory addresses, but we
7788 still have to handle it. */
7789 if (GET_CODE (op) == MEM)
7791 lo_half[num] = adjust_address (op, DImode, 0);
7792 hi_half[num] = adjust_address (op, DImode, 8);
7796 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7797 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7802 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7803 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7804 is the expression of the binary operation. The output may either be
7805 emitted here, or returned to the caller, like all output_* functions.
7807 There is no guarantee that the operands are the same mode, as they
7808 might be within FLOAT or FLOAT_EXTEND expressions. */
7810 #ifndef SYSV386_COMPAT
7811 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7812 wants to fix the assemblers because that causes incompatibility
7813 with gcc. No-one wants to fix gcc because that causes
7814 incompatibility with assemblers... You can use the option of
7815 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7816 #define SYSV386_COMPAT 1
7820 output_387_binary_op (insn, operands)
7824 static char buf[30];
7827 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7829 #ifdef ENABLE_CHECKING
7830 /* Even if we do not want to check the inputs, this documents input
7831 constraints. Which helps in understanding the following code. */
7832 if (STACK_REG_P (operands[0])
7833 && ((REG_P (operands[1])
7834 && REGNO (operands[0]) == REGNO (operands[1])
7835 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7836 || (REG_P (operands[2])
7837 && REGNO (operands[0]) == REGNO (operands[2])
7838 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7839 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7845 switch (GET_CODE (operands[3]))
7848 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7849 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7857 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7858 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7866 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7867 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7875 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7876 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7890 if (GET_MODE (operands[0]) == SFmode)
7891 strcat (buf, "ss\t{%2, %0|%0, %2}");
7893 strcat (buf, "sd\t{%2, %0|%0, %2}");
7898 switch (GET_CODE (operands[3]))
7902 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7904 rtx temp = operands[2];
7905 operands[2] = operands[1];
7909 /* know operands[0] == operands[1]. */
7911 if (GET_CODE (operands[2]) == MEM)
7917 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7919 if (STACK_TOP_P (operands[0]))
7920 /* How is it that we are storing to a dead operand[2]?
7921 Well, presumably operands[1] is dead too. We can't
7922 store the result to st(0) as st(0) gets popped on this
7923 instruction. Instead store to operands[2] (which I
7924 think has to be st(1)). st(1) will be popped later.
7925 gcc <= 2.8.1 didn't have this check and generated
7926 assembly code that the Unixware assembler rejected. */
7927 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7929 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7933 if (STACK_TOP_P (operands[0]))
7934 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7936 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7941 if (GET_CODE (operands[1]) == MEM)
7947 if (GET_CODE (operands[2]) == MEM)
7953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7956 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7957 derived assemblers, confusingly reverse the direction of
7958 the operation for fsub{r} and fdiv{r} when the
7959 destination register is not st(0). The Intel assembler
7960 doesn't have this brain damage. Read !SYSV386_COMPAT to
7961 figure out what the hardware really does. */
7962 if (STACK_TOP_P (operands[0]))
7963 p = "{p\t%0, %2|rp\t%2, %0}";
7965 p = "{rp\t%2, %0|p\t%0, %2}";
7967 if (STACK_TOP_P (operands[0]))
7968 /* As above for fmul/fadd, we can't store to st(0). */
7969 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7971 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7976 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7979 if (STACK_TOP_P (operands[0]))
7980 p = "{rp\t%0, %1|p\t%1, %0}";
7982 p = "{p\t%1, %0|rp\t%0, %1}";
7984 if (STACK_TOP_P (operands[0]))
7985 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7987 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7992 if (STACK_TOP_P (operands[0]))
7994 if (STACK_TOP_P (operands[1]))
7995 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7997 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8000 else if (STACK_TOP_P (operands[1]))
8003 p = "{\t%1, %0|r\t%0, %1}";
8005 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8011 p = "{r\t%2, %0|\t%0, %2}";
8013 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8026 /* Output code to initialize control word copies used by
8027 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8028 is set to control word rounding downwards. */
8030 emit_i387_cw_initialization (normal, round_down)
8031 rtx normal, round_down;
8033 rtx reg = gen_reg_rtx (HImode);
8035 emit_insn (gen_x86_fnstcw_1 (normal));
8036 emit_move_insn (reg, normal);
8037 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8039 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8041 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8042 emit_move_insn (round_down, reg);
8045 /* Output code for INSN to convert a float to a signed int. OPERANDS
8046 are the insn operands. The output may be [HSD]Imode and the input
8047 operand may be [SDX]Fmode. */
8050 output_fix_trunc (insn, operands)
8054 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8055 int dimode_p = GET_MODE (operands[0]) == DImode;
8057 /* Jump through a hoop or two for DImode, since the hardware has no
8058 non-popping instruction. We used to do this a different way, but
8059 that was somewhat fragile and broke with post-reload splitters. */
8060 if (dimode_p && !stack_top_dies)
8061 output_asm_insn ("fld\t%y1", operands);
8063 if (!STACK_TOP_P (operands[1]))
8066 if (GET_CODE (operands[0]) != MEM)
8069 output_asm_insn ("fldcw\t%3", operands);
8070 if (stack_top_dies || dimode_p)
8071 output_asm_insn ("fistp%z0\t%0", operands);
8073 output_asm_insn ("fist%z0\t%0", operands);
8074 output_asm_insn ("fldcw\t%2", operands);
8079 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8080 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8081 when fucom should be used. */
8084 output_fp_compare (insn, operands, eflags_p, unordered_p)
8087 int eflags_p, unordered_p;
8090 rtx cmp_op0 = operands[0];
8091 rtx cmp_op1 = operands[1];
8092 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8097 cmp_op1 = operands[2];
8101 if (GET_MODE (operands[0]) == SFmode)
8103 return "ucomiss\t{%1, %0|%0, %1}";
8105 return "comiss\t{%1, %0|%0, %1}";
8108 return "ucomisd\t{%1, %0|%0, %1}";
8110 return "comisd\t{%1, %0|%0, %1}";
8113 if (! STACK_TOP_P (cmp_op0))
8116 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8118 if (STACK_REG_P (cmp_op1)
8120 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8121 && REGNO (cmp_op1) != FIRST_STACK_REG)
8123 /* If both the top of the 387 stack dies, and the other operand
8124 is also a stack register that dies, then this must be a
8125 `fcompp' float compare */
8129 /* There is no double popping fcomi variant. Fortunately,
8130 eflags is immune from the fstp's cc clobbering. */
8132 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8134 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8142 return "fucompp\n\tfnstsw\t%0";
8144 return "fcompp\n\tfnstsw\t%0";
8157 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8159 static const char * const alt[24] =
8171 "fcomi\t{%y1, %0|%0, %y1}",
8172 "fcomip\t{%y1, %0|%0, %y1}",
8173 "fucomi\t{%y1, %0|%0, %y1}",
8174 "fucomip\t{%y1, %0|%0, %y1}",
8181 "fcom%z2\t%y2\n\tfnstsw\t%0",
8182 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8183 "fucom%z2\t%y2\n\tfnstsw\t%0",
8184 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8186 "ficom%z2\t%y2\n\tfnstsw\t%0",
8187 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8195 mask = eflags_p << 3;
8196 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8197 mask |= unordered_p << 1;
8198 mask |= stack_top_dies;
8211 ix86_output_addr_vec_elt (file, value)
8215 const char *directive = ASM_LONG;
8220 directive = ASM_QUAD;
8226 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8230 ix86_output_addr_diff_elt (file, value, rel)
8235 fprintf (file, "%s%s%d-%s%d\n",
8236 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8237 else if (HAVE_AS_GOTOFF_IN_DATA)
8238 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8240 else if (TARGET_MACHO)
8241 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8242 machopic_function_base_name () + 1);
8245 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8246 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8249 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8253 ix86_expand_clear (dest)
8258 /* We play register width games, which are only valid after reload. */
8259 if (!reload_completed)
8262 /* Avoid HImode and its attendant prefix byte. */
8263 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8264 dest = gen_rtx_REG (SImode, REGNO (dest));
8266 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8268 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8269 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8271 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8272 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8278 /* X is an unchanging MEM. If it is a constant pool reference, return
8279 the constant pool rtx, else NULL. */
8282 maybe_get_pool_constant (x)
8285 x = ix86_delegitimize_address (XEXP (x, 0));
8287 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8288 return get_pool_constant (x);
8294 ix86_expand_move (mode, operands)
8295 enum machine_mode mode;
8298 int strict = (reload_in_progress || reload_completed);
8300 enum tls_model model;
8305 model = tls_symbolic_operand (op1, Pmode);
8308 op1 = legitimize_tls_address (op1, model, true);
8309 op1 = force_operand (op1, op0);
8314 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8319 rtx temp = ((reload_in_progress
8320 || ((op0 && GET_CODE (op0) == REG)
8322 ? op0 : gen_reg_rtx (Pmode));
8323 op1 = machopic_indirect_data_reference (op1, temp);
8324 op1 = machopic_legitimize_pic_address (op1, mode,
8325 temp == op1 ? 0 : temp);
8327 else if (MACHOPIC_INDIRECT)
8328 op1 = machopic_indirect_data_reference (op1, 0);
8332 if (GET_CODE (op0) == MEM)
8333 op1 = force_reg (Pmode, op1);
8337 if (GET_CODE (temp) != REG)
8338 temp = gen_reg_rtx (Pmode);
8339 temp = legitimize_pic_address (op1, temp);
8344 #endif /* TARGET_MACHO */
8348 if (GET_CODE (op0) == MEM
8349 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8350 || !push_operand (op0, mode))
8351 && GET_CODE (op1) == MEM)
8352 op1 = force_reg (mode, op1);
8354 if (push_operand (op0, mode)
8355 && ! general_no_elim_operand (op1, mode))
8356 op1 = copy_to_mode_reg (mode, op1);
8358 /* Force large constants in 64bit compilation into register
8359 to get them CSEed. */
8360 if (TARGET_64BIT && mode == DImode
8361 && immediate_operand (op1, mode)
8362 && !x86_64_zero_extended_value (op1)
8363 && !register_operand (op0, mode)
8364 && optimize && !reload_completed && !reload_in_progress)
8365 op1 = copy_to_mode_reg (mode, op1);
8367 if (FLOAT_MODE_P (mode))
8369 /* If we are loading a floating point constant to a register,
8370 force the value to memory now, since we'll get better code
8371 out the back end. */
8375 else if (GET_CODE (op1) == CONST_DOUBLE)
8377 op1 = validize_mem (force_const_mem (mode, op1));
8378 if (!register_operand (op0, mode))
8380 rtx temp = gen_reg_rtx (mode);
8381 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8382 emit_move_insn (op0, temp);
8389 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8393 ix86_expand_vector_move (mode, operands)
8394 enum machine_mode mode;
8397 /* Force constants other than zero into memory. We do not know how
8398 the instructions used to build constants modify the upper 64 bits
8399 of the register, once we have that information we may be able
8400 to handle some of them more efficiently. */
8401 if ((reload_in_progress | reload_completed) == 0
8402 && register_operand (operands[0], mode)
8403 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8404 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8406 /* Make operand1 a register if it isn't already. */
8408 && !register_operand (operands[0], mode)
8409 && !register_operand (operands[1], mode))
8411 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8412 emit_move_insn (operands[0], temp);
8416 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8419 /* Attempt to expand a binary operator. Make the expansion closer to the
8420 actual machine, then just general_operand, which will allow 3 separate
8421 memory references (one output, two input) in a single insn. */
8424 ix86_expand_binary_operator (code, mode, operands)
8426 enum machine_mode mode;
8429 int matching_memory;
8430 rtx src1, src2, dst, op, clob;
8436 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8437 if (GET_RTX_CLASS (code) == 'c'
8438 && (rtx_equal_p (dst, src2)
8439 || immediate_operand (src1, mode)))
8446 /* If the destination is memory, and we do not have matching source
8447 operands, do things in registers. */
8448 matching_memory = 0;
8449 if (GET_CODE (dst) == MEM)
8451 if (rtx_equal_p (dst, src1))
8452 matching_memory = 1;
8453 else if (GET_RTX_CLASS (code) == 'c'
8454 && rtx_equal_p (dst, src2))
8455 matching_memory = 2;
8457 dst = gen_reg_rtx (mode);
8460 /* Both source operands cannot be in memory. */
8461 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8463 if (matching_memory != 2)
8464 src2 = force_reg (mode, src2);
8466 src1 = force_reg (mode, src1);
8469 /* If the operation is not commutable, source 1 cannot be a constant
8470 or non-matching memory. */
8471 if ((CONSTANT_P (src1)
8472 || (!matching_memory && GET_CODE (src1) == MEM))
8473 && GET_RTX_CLASS (code) != 'c')
8474 src1 = force_reg (mode, src1);
8476 /* If optimizing, copy to regs to improve CSE */
8477 if (optimize && ! no_new_pseudos)
8479 if (GET_CODE (dst) == MEM)
8480 dst = gen_reg_rtx (mode);
8481 if (GET_CODE (src1) == MEM)
8482 src1 = force_reg (mode, src1);
8483 if (GET_CODE (src2) == MEM)
8484 src2 = force_reg (mode, src2);
8487 /* Emit the instruction. */
8489 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8490 if (reload_in_progress)
8492 /* Reload doesn't know about the flags register, and doesn't know that
8493 it doesn't want to clobber it. We can only do this with PLUS. */
8500 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8501 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8504 /* Fix up the destination if needed. */
8505 if (dst != operands[0])
8506 emit_move_insn (operands[0], dst);
8509 /* Return TRUE or FALSE depending on whether the binary operator meets the
8510 appropriate constraints. */
8513 ix86_binary_operator_ok (code, mode, operands)
8515 enum machine_mode mode ATTRIBUTE_UNUSED;
8518 /* Both source operands cannot be in memory. */
8519 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8521 /* If the operation is not commutable, source 1 cannot be a constant. */
8522 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8524 /* If the destination is memory, we must have a matching source operand. */
8525 if (GET_CODE (operands[0]) == MEM
8526 && ! (rtx_equal_p (operands[0], operands[1])
8527 || (GET_RTX_CLASS (code) == 'c'
8528 && rtx_equal_p (operands[0], operands[2]))))
8530 /* If the operation is not commutable and the source 1 is memory, we must
8531 have a matching destination. */
8532 if (GET_CODE (operands[1]) == MEM
8533 && GET_RTX_CLASS (code) != 'c'
8534 && ! rtx_equal_p (operands[0], operands[1]))
8539 /* Attempt to expand a unary operator. Make the expansion closer to the
8540 actual machine, then just general_operand, which will allow 2 separate
8541 memory references (one output, one input) in a single insn. */
8544 ix86_expand_unary_operator (code, mode, operands)
8546 enum machine_mode mode;
8549 int matching_memory;
8550 rtx src, dst, op, clob;
8555 /* If the destination is memory, and we do not have matching source
8556 operands, do things in registers. */
8557 matching_memory = 0;
8558 if (GET_CODE (dst) == MEM)
8560 if (rtx_equal_p (dst, src))
8561 matching_memory = 1;
8563 dst = gen_reg_rtx (mode);
8566 /* When source operand is memory, destination must match. */
8567 if (!matching_memory && GET_CODE (src) == MEM)
8568 src = force_reg (mode, src);
8570 /* If optimizing, copy to regs to improve CSE */
8571 if (optimize && ! no_new_pseudos)
8573 if (GET_CODE (dst) == MEM)
8574 dst = gen_reg_rtx (mode);
8575 if (GET_CODE (src) == MEM)
8576 src = force_reg (mode, src);
8579 /* Emit the instruction. */
8581 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8582 if (reload_in_progress || code == NOT)
8584 /* Reload doesn't know about the flags register, and doesn't know that
8585 it doesn't want to clobber it. */
8592 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8593 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8596 /* Fix up the destination if needed. */
8597 if (dst != operands[0])
8598 emit_move_insn (operands[0], dst);
8601 /* Return TRUE or FALSE depending on whether the unary operator meets the
8602 appropriate constraints. */
8605 ix86_unary_operator_ok (code, mode, operands)
8606 enum rtx_code code ATTRIBUTE_UNUSED;
8607 enum machine_mode mode ATTRIBUTE_UNUSED;
8608 rtx operands[2] ATTRIBUTE_UNUSED;
8610 /* If one of operands is memory, source and destination must match. */
8611 if ((GET_CODE (operands[0]) == MEM
8612 || GET_CODE (operands[1]) == MEM)
8613 && ! rtx_equal_p (operands[0], operands[1]))
8618 /* Return TRUE or FALSE depending on whether the first SET in INSN
8619 has source and destination with matching CC modes, and that the
8620 CC mode is at least as constrained as REQ_MODE. */
8623 ix86_match_ccmode (insn, req_mode)
8625 enum machine_mode req_mode;
8628 enum machine_mode set_mode;
8630 set = PATTERN (insn);
8631 if (GET_CODE (set) == PARALLEL)
8632 set = XVECEXP (set, 0, 0);
8633 if (GET_CODE (set) != SET)
8635 if (GET_CODE (SET_SRC (set)) != COMPARE)
8638 set_mode = GET_MODE (SET_DEST (set));
8642 if (req_mode != CCNOmode
8643 && (req_mode != CCmode
8644 || XEXP (SET_SRC (set), 1) != const0_rtx))
8648 if (req_mode == CCGCmode)
8652 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8656 if (req_mode == CCZmode)
8666 return (GET_MODE (SET_SRC (set)) == set_mode);
8669 /* Generate insn patterns to do an integer compare of OPERANDS. */
8672 ix86_expand_int_compare (code, op0, op1)
8676 enum machine_mode cmpmode;
8679 cmpmode = SELECT_CC_MODE (code, op0, op1);
8680 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8682 /* This is very simple, but making the interface the same as in the
8683 FP case makes the rest of the code easier. */
8684 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8685 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8687 /* Return the test that should be put into the flags user, i.e.
8688 the bcc, scc, or cmov instruction. */
8689 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8692 /* Figure out whether to use ordered or unordered fp comparisons.
8693 Return the appropriate mode to use. */
8696 ix86_fp_compare_mode (code)
8697 enum rtx_code code ATTRIBUTE_UNUSED;
8699 /* ??? In order to make all comparisons reversible, we do all comparisons
8700 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8701 all forms trapping and nontrapping comparisons, we can make inequality
8702 comparisons trapping again, since it results in better code when using
8703 FCOM based compares. */
8704 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8708 ix86_cc_mode (code, op0, op1)
8712 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8713 return ix86_fp_compare_mode (code);
8716 /* Only zero flag is needed. */
8718 case NE: /* ZF!=0 */
8720 /* Codes needing carry flag. */
8721 case GEU: /* CF=0 */
8722 case GTU: /* CF=0 & ZF=0 */
8723 case LTU: /* CF=1 */
8724 case LEU: /* CF=1 | ZF=1 */
8726 /* Codes possibly doable only with sign flag when
8727 comparing against zero. */
8728 case GE: /* SF=OF or SF=0 */
8729 case LT: /* SF<>OF or SF=1 */
8730 if (op1 == const0_rtx)
8733 /* For other cases Carry flag is not required. */
8735 /* Codes doable only with sign flag when comparing
8736 against zero, but we miss jump instruction for it
8737 so we need to use relational tests against overflow
8738 that thus needs to be zero. */
8739 case GT: /* ZF=0 & SF=OF */
8740 case LE: /* ZF=1 | SF<>OF */
8741 if (op1 == const0_rtx)
8745 /* strcmp pattern do (use flags) and combine may ask us for proper
8754 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8757 ix86_use_fcomi_compare (code)
8758 enum rtx_code code ATTRIBUTE_UNUSED;
8760 enum rtx_code swapped_code = swap_condition (code);
8761 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8762 || (ix86_fp_comparison_cost (swapped_code)
8763 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8766 /* Swap, force into registers, or otherwise massage the two operands
8767 to a fp comparison. The operands are updated in place; the new
8768 comparison code is returned. */
8770 static enum rtx_code
8771 ix86_prepare_fp_compare_args (code, pop0, pop1)
8775 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8776 rtx op0 = *pop0, op1 = *pop1;
8777 enum machine_mode op_mode = GET_MODE (op0);
8778 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8780 /* All of the unordered compare instructions only work on registers.
8781 The same is true of the XFmode compare instructions. The same is
8782 true of the fcomi compare instructions. */
8785 && (fpcmp_mode == CCFPUmode
8786 || op_mode == XFmode
8787 || op_mode == TFmode
8788 || ix86_use_fcomi_compare (code)))
8790 op0 = force_reg (op_mode, op0);
8791 op1 = force_reg (op_mode, op1);
8795 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8796 things around if they appear profitable, otherwise force op0
8799 if (standard_80387_constant_p (op0) == 0
8800 || (GET_CODE (op0) == MEM
8801 && ! (standard_80387_constant_p (op1) == 0
8802 || GET_CODE (op1) == MEM)))
8805 tmp = op0, op0 = op1, op1 = tmp;
8806 code = swap_condition (code);
8809 if (GET_CODE (op0) != REG)
8810 op0 = force_reg (op_mode, op0);
8812 if (CONSTANT_P (op1))
8814 if (standard_80387_constant_p (op1))
8815 op1 = force_reg (op_mode, op1);
8817 op1 = validize_mem (force_const_mem (op_mode, op1));
8821 /* Try to rearrange the comparison to make it cheaper. */
8822 if (ix86_fp_comparison_cost (code)
8823 > ix86_fp_comparison_cost (swap_condition (code))
8824 && (GET_CODE (op1) == REG || !no_new_pseudos))
8827 tmp = op0, op0 = op1, op1 = tmp;
8828 code = swap_condition (code);
8829 if (GET_CODE (op0) != REG)
8830 op0 = force_reg (op_mode, op0);
8838 /* Convert comparison codes we use to represent FP comparison to integer
8839 code that will result in proper branch. Return UNKNOWN if no such code
8841 static enum rtx_code
8842 ix86_fp_compare_code_to_integer (code)
8872 /* Split comparison code CODE into comparisons we can do using branch
8873 instructions. BYPASS_CODE is comparison code for branch that will
8874 branch around FIRST_CODE and SECOND_CODE. If some of branches
8875 is not required, set value to NIL.
8876 We never require more than two branches. */
8878 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8879 enum rtx_code code, *bypass_code, *first_code, *second_code;
8885 /* The fcomi comparison sets flags as follows:
8895 case GT: /* GTU - CF=0 & ZF=0 */
8896 case GE: /* GEU - CF=0 */
8897 case ORDERED: /* PF=0 */
8898 case UNORDERED: /* PF=1 */
8899 case UNEQ: /* EQ - ZF=1 */
8900 case UNLT: /* LTU - CF=1 */
8901 case UNLE: /* LEU - CF=1 | ZF=1 */
8902 case LTGT: /* EQ - ZF=0 */
8904 case LT: /* LTU - CF=1 - fails on unordered */
8906 *bypass_code = UNORDERED;
8908 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8910 *bypass_code = UNORDERED;
8912 case EQ: /* EQ - ZF=1 - fails on unordered */
8914 *bypass_code = UNORDERED;
8916 case NE: /* NE - ZF=0 - fails on unordered */
8918 *second_code = UNORDERED;
8920 case UNGE: /* GEU - CF=0 - fails on unordered */
8922 *second_code = UNORDERED;
8924 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8926 *second_code = UNORDERED;
8931 if (!TARGET_IEEE_FP)
8938 /* Return cost of comparison done fcom + arithmetics operations on AX.
8939 All following functions do use number of instructions as a cost metrics.
8940 In future this should be tweaked to compute bytes for optimize_size and
8941 take into account performance of various instructions on various CPUs. */
8943 ix86_fp_comparison_arithmetics_cost (code)
8946 if (!TARGET_IEEE_FP)
8948 /* The cost of code output by ix86_expand_fp_compare. */
8976 /* Return cost of comparison done using fcomi operation.
8977 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8979 ix86_fp_comparison_fcomi_cost (code)
8982 enum rtx_code bypass_code, first_code, second_code;
8983 /* Return arbitrarily high cost when instruction is not supported - this
8984 prevents gcc from using it. */
8987 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8988 return (bypass_code != NIL || second_code != NIL) + 2;
8991 /* Return cost of comparison done using sahf operation.
8992 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8994 ix86_fp_comparison_sahf_cost (code)
8997 enum rtx_code bypass_code, first_code, second_code;
8998 /* Return arbitrarily high cost when instruction is not preferred - this
8999 avoids gcc from using it. */
9000 if (!TARGET_USE_SAHF && !optimize_size)
9002 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9003 return (bypass_code != NIL || second_code != NIL) + 3;
9006 /* Compute cost of the comparison done using any method.
9007 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9009 ix86_fp_comparison_cost (code)
9012 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9015 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9016 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9018 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9019 if (min > sahf_cost)
9021 if (min > fcomi_cost)
9026 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9029 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
9031 rtx op0, op1, scratch;
9035 enum machine_mode fpcmp_mode, intcmp_mode;
9037 int cost = ix86_fp_comparison_cost (code);
9038 enum rtx_code bypass_code, first_code, second_code;
9040 fpcmp_mode = ix86_fp_compare_mode (code);
9041 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9044 *second_test = NULL_RTX;
9046 *bypass_test = NULL_RTX;
9048 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9050 /* Do fcomi/sahf based test when profitable. */
9051 if ((bypass_code == NIL || bypass_test)
9052 && (second_code == NIL || second_test)
9053 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9057 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9058 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9064 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9065 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9067 scratch = gen_reg_rtx (HImode);
9068 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9069 emit_insn (gen_x86_sahf_1 (scratch));
9072 /* The FP codes work out to act like unsigned. */
9073 intcmp_mode = fpcmp_mode;
9075 if (bypass_code != NIL)
9076 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9077 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9079 if (second_code != NIL)
9080 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9081 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9086 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9087 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9088 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9090 scratch = gen_reg_rtx (HImode);
9091 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9093 /* In the unordered case, we have to check C2 for NaN's, which
9094 doesn't happen to work out to anything nice combination-wise.
9095 So do some bit twiddling on the value we've got in AH to come
9096 up with an appropriate set of condition codes. */
9098 intcmp_mode = CCNOmode;
9103 if (code == GT || !TARGET_IEEE_FP)
9105 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9110 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9111 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9112 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9113 intcmp_mode = CCmode;
9119 if (code == LT && TARGET_IEEE_FP)
9121 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9122 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9123 intcmp_mode = CCmode;
9128 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9134 if (code == GE || !TARGET_IEEE_FP)
9136 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9141 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9142 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9149 if (code == LE && TARGET_IEEE_FP)
9151 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9152 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9153 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9154 intcmp_mode = CCmode;
9159 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9165 if (code == EQ && TARGET_IEEE_FP)
9167 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9168 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9169 intcmp_mode = CCmode;
9174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9181 if (code == NE && TARGET_IEEE_FP)
9183 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9184 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9190 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9196 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9209 /* Return the test that should be put into the flags user, i.e.
9210 the bcc, scc, or cmov instruction. */
9211 return gen_rtx_fmt_ee (code, VOIDmode,
9212 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9217 ix86_expand_compare (code, second_test, bypass_test)
9219 rtx *second_test, *bypass_test;
9222 op0 = ix86_compare_op0;
9223 op1 = ix86_compare_op1;
9226 *second_test = NULL_RTX;
9228 *bypass_test = NULL_RTX;
9230 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9231 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9232 second_test, bypass_test);
9234 ret = ix86_expand_int_compare (code, op0, op1);
9239 /* Return true if the CODE will result in nontrivial jump sequence. */
9241 ix86_fp_jump_nontrivial_p (code)
9244 enum rtx_code bypass_code, first_code, second_code;
9247 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9248 return bypass_code != NIL || second_code != NIL;
9252 ix86_expand_branch (code, label)
9258 switch (GET_MODE (ix86_compare_op0))
9264 tmp = ix86_expand_compare (code, NULL, NULL);
9265 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9266 gen_rtx_LABEL_REF (VOIDmode, label),
9268 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9278 enum rtx_code bypass_code, first_code, second_code;
9280 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9283 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9285 /* Check whether we will use the natural sequence with one jump. If
9286 so, we can expand jump early. Otherwise delay expansion by
9287 creating compound insn to not confuse optimizers. */
9288 if (bypass_code == NIL && second_code == NIL
9291 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9292 gen_rtx_LABEL_REF (VOIDmode, label),
9297 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9298 ix86_compare_op0, ix86_compare_op1);
9299 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9300 gen_rtx_LABEL_REF (VOIDmode, label),
9302 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9304 use_fcomi = ix86_use_fcomi_compare (code);
9305 vec = rtvec_alloc (3 + !use_fcomi);
9306 RTVEC_ELT (vec, 0) = tmp;
9308 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9310 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9313 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9315 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9323 /* Expand DImode branch into multiple compare+branch. */
9325 rtx lo[2], hi[2], label2;
9326 enum rtx_code code1, code2, code3;
9328 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9330 tmp = ix86_compare_op0;
9331 ix86_compare_op0 = ix86_compare_op1;
9332 ix86_compare_op1 = tmp;
9333 code = swap_condition (code);
9335 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9336 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9338 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9339 avoid two branches. This costs one extra insn, so disable when
9340 optimizing for size. */
9342 if ((code == EQ || code == NE)
9344 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9349 if (hi[1] != const0_rtx)
9350 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9351 NULL_RTX, 0, OPTAB_WIDEN);
9354 if (lo[1] != const0_rtx)
9355 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9356 NULL_RTX, 0, OPTAB_WIDEN);
9358 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9359 NULL_RTX, 0, OPTAB_WIDEN);
9361 ix86_compare_op0 = tmp;
9362 ix86_compare_op1 = const0_rtx;
9363 ix86_expand_branch (code, label);
9367 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9368 op1 is a constant and the low word is zero, then we can just
9369 examine the high word. */
9371 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9374 case LT: case LTU: case GE: case GEU:
9375 ix86_compare_op0 = hi[0];
9376 ix86_compare_op1 = hi[1];
9377 ix86_expand_branch (code, label);
9383 /* Otherwise, we need two or three jumps. */
9385 label2 = gen_label_rtx ();
9388 code2 = swap_condition (code);
9389 code3 = unsigned_condition (code);
9393 case LT: case GT: case LTU: case GTU:
9396 case LE: code1 = LT; code2 = GT; break;
9397 case GE: code1 = GT; code2 = LT; break;
9398 case LEU: code1 = LTU; code2 = GTU; break;
9399 case GEU: code1 = GTU; code2 = LTU; break;
9401 case EQ: code1 = NIL; code2 = NE; break;
9402 case NE: code2 = NIL; break;
9410 * if (hi(a) < hi(b)) goto true;
9411 * if (hi(a) > hi(b)) goto false;
9412 * if (lo(a) < lo(b)) goto true;
9416 ix86_compare_op0 = hi[0];
9417 ix86_compare_op1 = hi[1];
9420 ix86_expand_branch (code1, label);
9422 ix86_expand_branch (code2, label2);
9424 ix86_compare_op0 = lo[0];
9425 ix86_compare_op1 = lo[1];
9426 ix86_expand_branch (code3, label);
9429 emit_label (label2);
9438 /* Split branch based on floating point condition. */
9440 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9442 rtx op1, op2, target1, target2, tmp;
9445 rtx label = NULL_RTX;
9447 int bypass_probability = -1, second_probability = -1, probability = -1;
9450 if (target2 != pc_rtx)
9453 code = reverse_condition_maybe_unordered (code);
9458 condition = ix86_expand_fp_compare (code, op1, op2,
9459 tmp, &second, &bypass);
9461 if (split_branch_probability >= 0)
9463 /* Distribute the probabilities across the jumps.
9464 Assume the BYPASS and SECOND to be always test
9466 probability = split_branch_probability;
9468 /* Value of 1 is low enough to make no need for probability
9469 to be updated. Later we may run some experiments and see
9470 if unordered values are more frequent in practice. */
9472 bypass_probability = 1;
9474 second_probability = 1;
9476 if (bypass != NULL_RTX)
9478 label = gen_label_rtx ();
9479 i = emit_jump_insn (gen_rtx_SET
9481 gen_rtx_IF_THEN_ELSE (VOIDmode,
9483 gen_rtx_LABEL_REF (VOIDmode,
9486 if (bypass_probability >= 0)
9488 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9489 GEN_INT (bypass_probability),
9492 i = emit_jump_insn (gen_rtx_SET
9494 gen_rtx_IF_THEN_ELSE (VOIDmode,
9495 condition, target1, target2)));
9496 if (probability >= 0)
9498 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9499 GEN_INT (probability),
9501 if (second != NULL_RTX)
9503 i = emit_jump_insn (gen_rtx_SET
9505 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9507 if (second_probability >= 0)
9509 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9510 GEN_INT (second_probability),
9513 if (label != NULL_RTX)
9518 ix86_expand_setcc (code, dest)
9522 rtx ret, tmp, tmpreg;
9523 rtx second_test, bypass_test;
9525 if (GET_MODE (ix86_compare_op0) == DImode
9527 return 0; /* FAIL */
9529 if (GET_MODE (dest) != QImode)
9532 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9533 PUT_MODE (ret, QImode);
9538 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9539 if (bypass_test || second_test)
9541 rtx test = second_test;
9543 rtx tmp2 = gen_reg_rtx (QImode);
9550 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9552 PUT_MODE (test, QImode);
9553 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9556 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9558 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9561 return 1; /* DONE */
9564 /* Expand comparison setting or clearing carry flag. Return true when successful
9565 and set pop for the operation. */
9567 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9571 enum machine_mode mode =
9572 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9574 /* Do not handle DImode compares that go trought special path. Also we can't
9575 deal with FP compares yet. This is possible to add. */
9576 if ((mode == DImode && !TARGET_64BIT))
9578 if (FLOAT_MODE_P (mode))
9580 rtx second_test = NULL, bypass_test = NULL;
9581 rtx compare_op, compare_seq;
9583 /* Shortcut: following common codes never translate into carry flag compares. */
9584 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9585 || code == ORDERED || code == UNORDERED)
9588 /* These comparisons require zero flag; swap operands so they won't. */
9589 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9595 code = swap_condition (code);
9598 /* Try to expand the comparsion and verify that we end up with carry flag
9599 based comparsion. This is fails to be true only when we decide to expand
9600 comparsion using arithmetic that is not too common scenario. */
9602 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9603 &second_test, &bypass_test);
9604 compare_seq = get_insns ();
9607 if (second_test || bypass_test)
9609 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9610 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9611 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9613 code = GET_CODE (compare_op);
9614 if (code != LTU && code != GEU)
9616 emit_insn (compare_seq);
9620 if (!INTEGRAL_MODE_P (mode))
9628 /* Convert a==0 into (unsigned)a<1. */
9631 if (op1 != const0_rtx)
9634 code = (code == EQ ? LTU : GEU);
9637 /* Convert a>b into b<a or a>=b-1. */
9640 if (GET_CODE (op1) == CONST_INT)
9642 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9643 /* Bail out on overflow. We still can swap operands but that
9644 would force loading of the constant into register. */
9645 if (op1 == const0_rtx
9646 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9648 code = (code == GTU ? GEU : LTU);
9655 code = (code == GTU ? LTU : GEU);
9659 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9662 if (mode == DImode || op1 != const0_rtx)
9664 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9665 code = (code == LT ? GEU : LTU);
9669 if (mode == DImode || op1 != constm1_rtx)
9671 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9672 code = (code == LE ? GEU : LTU);
9678 ix86_compare_op0 = op0;
9679 ix86_compare_op1 = op1;
9680 *pop = ix86_expand_compare (code, NULL, NULL);
9681 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9687 ix86_expand_int_movcc (operands)
9690 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9691 rtx compare_seq, compare_op;
9692 rtx second_test, bypass_test;
9693 enum machine_mode mode = GET_MODE (operands[0]);
9694 bool sign_bit_compare_p = false;;
9697 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9698 compare_seq = get_insns ();
9701 compare_code = GET_CODE (compare_op);
9703 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9704 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9705 sign_bit_compare_p = true;
9707 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9708 HImode insns, we'd be swallowed in word prefix ops. */
9710 if ((mode != HImode || TARGET_FAST_PREFIX)
9711 && (mode != DImode || TARGET_64BIT)
9712 && GET_CODE (operands[2]) == CONST_INT
9713 && GET_CODE (operands[3]) == CONST_INT)
9715 rtx out = operands[0];
9716 HOST_WIDE_INT ct = INTVAL (operands[2]);
9717 HOST_WIDE_INT cf = INTVAL (operands[3]);
9721 /* Sign bit compares are better done using shifts than we do by using
9723 if (sign_bit_compare_p
9724 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9725 ix86_compare_op1, &compare_op))
9727 /* Detect overlap between destination and compare sources. */
9730 if (!sign_bit_compare_p)
9734 compare_code = GET_CODE (compare_op);
9736 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9737 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9740 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9743 /* To simplify rest of code, restrict to the GEU case. */
9744 if (compare_code == LTU)
9746 HOST_WIDE_INT tmp = ct;
9749 compare_code = reverse_condition (compare_code);
9750 code = reverse_condition (code);
9755 PUT_CODE (compare_op,
9756 reverse_condition_maybe_unordered
9757 (GET_CODE (compare_op)));
9759 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9763 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9764 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9765 tmp = gen_reg_rtx (mode);
9768 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9770 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9774 if (code == GT || code == GE)
9775 code = reverse_condition (code);
9778 HOST_WIDE_INT tmp = ct;
9783 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9784 ix86_compare_op1, VOIDmode, 0, -1);
9797 tmp = expand_simple_binop (mode, PLUS,
9799 copy_rtx (tmp), 1, OPTAB_DIRECT);
9810 tmp = expand_simple_binop (mode, IOR,
9812 copy_rtx (tmp), 1, OPTAB_DIRECT);
9814 else if (diff == -1 && ct)
9824 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9826 tmp = expand_simple_binop (mode, PLUS,
9827 copy_rtx (tmp), GEN_INT (cf),
9828 copy_rtx (tmp), 1, OPTAB_DIRECT);
9836 * andl cf - ct, dest
9846 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9849 tmp = expand_simple_binop (mode, AND,
9851 gen_int_mode (cf - ct, mode),
9852 copy_rtx (tmp), 1, OPTAB_DIRECT);
9854 tmp = expand_simple_binop (mode, PLUS,
9855 copy_rtx (tmp), GEN_INT (ct),
9856 copy_rtx (tmp), 1, OPTAB_DIRECT);
9859 if (!rtx_equal_p (tmp, out))
9860 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9862 return 1; /* DONE */
9868 tmp = ct, ct = cf, cf = tmp;
9870 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9872 /* We may be reversing unordered compare to normal compare, that
9873 is not valid in general (we may convert non-trapping condition
9874 to trapping one), however on i386 we currently emit all
9875 comparisons unordered. */
9876 compare_code = reverse_condition_maybe_unordered (compare_code);
9877 code = reverse_condition_maybe_unordered (code);
9881 compare_code = reverse_condition (compare_code);
9882 code = reverse_condition (code);
9887 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9888 && GET_CODE (ix86_compare_op1) == CONST_INT)
9890 if (ix86_compare_op1 == const0_rtx
9891 && (code == LT || code == GE))
9892 compare_code = code;
9893 else if (ix86_compare_op1 == constm1_rtx)
9897 else if (code == GT)
9902 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9903 if (compare_code != NIL
9904 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9905 && (cf == -1 || ct == -1))
9907 /* If lea code below could be used, only optimize
9908 if it results in a 2 insn sequence. */
9910 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9911 || diff == 3 || diff == 5 || diff == 9)
9912 || (compare_code == LT && ct == -1)
9913 || (compare_code == GE && cf == -1))
9916 * notl op1 (if necessary)
9924 code = reverse_condition (code);
9927 out = emit_store_flag (out, code, ix86_compare_op0,
9928 ix86_compare_op1, VOIDmode, 0, -1);
9930 out = expand_simple_binop (mode, IOR,
9932 out, 1, OPTAB_DIRECT);
9933 if (out != operands[0])
9934 emit_move_insn (operands[0], out);
9936 return 1; /* DONE */
9941 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9942 || diff == 3 || diff == 5 || diff == 9)
9943 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9944 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9950 * lea cf(dest*(ct-cf)),dest
9954 * This also catches the degenerate setcc-only case.
9960 out = emit_store_flag (out, code, ix86_compare_op0,
9961 ix86_compare_op1, VOIDmode, 0, 1);
9964 /* On x86_64 the lea instruction operates on Pmode, so we need
9965 to get arithmetics done in proper mode to match. */
9967 tmp = copy_rtx (out);
9971 out1 = copy_rtx (out);
9972 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9976 tmp = gen_rtx_PLUS (mode, tmp, out1);
9982 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9985 if (!rtx_equal_p (tmp, out))
9988 out = force_operand (tmp, copy_rtx (out));
9990 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9992 if (!rtx_equal_p (out, operands[0]))
9993 emit_move_insn (operands[0], copy_rtx (out));
9995 return 1; /* DONE */
9999 * General case: Jumpful:
10000 * xorl dest,dest cmpl op1, op2
10001 * cmpl op1, op2 movl ct, dest
10002 * setcc dest jcc 1f
10003 * decl dest movl cf, dest
10004 * andl (cf-ct),dest 1:
10007 * Size 20. Size 14.
10009 * This is reasonably steep, but branch mispredict costs are
10010 * high on modern cpus, so consider failing only if optimizing
10014 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10015 && BRANCH_COST >= 2)
10021 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10022 /* We may be reversing unordered compare to normal compare,
10023 that is not valid in general (we may convert non-trapping
10024 condition to trapping one), however on i386 we currently
10025 emit all comparisons unordered. */
10026 code = reverse_condition_maybe_unordered (code);
10029 code = reverse_condition (code);
10030 if (compare_code != NIL)
10031 compare_code = reverse_condition (compare_code);
10035 if (compare_code != NIL)
10037 /* notl op1 (if needed)
10042 For x < 0 (resp. x <= -1) there will be no notl,
10043 so if possible swap the constants to get rid of the
10045 True/false will be -1/0 while code below (store flag
10046 followed by decrement) is 0/-1, so the constants need
10047 to be exchanged once more. */
10049 if (compare_code == GE || !cf)
10051 code = reverse_condition (code);
10056 HOST_WIDE_INT tmp = cf;
10061 out = emit_store_flag (out, code, ix86_compare_op0,
10062 ix86_compare_op1, VOIDmode, 0, -1);
10066 out = emit_store_flag (out, code, ix86_compare_op0,
10067 ix86_compare_op1, VOIDmode, 0, 1);
10069 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10070 copy_rtx (out), 1, OPTAB_DIRECT);
10073 out = expand_simple_binop (mode, AND, copy_rtx (out),
10074 gen_int_mode (cf - ct, mode),
10075 copy_rtx (out), 1, OPTAB_DIRECT);
10077 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10078 copy_rtx (out), 1, OPTAB_DIRECT);
10079 if (!rtx_equal_p (out, operands[0]))
10080 emit_move_insn (operands[0], copy_rtx (out));
10082 return 1; /* DONE */
10086 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10088 /* Try a few things more with specific constants and a variable. */
10091 rtx var, orig_out, out, tmp;
10093 if (BRANCH_COST <= 2)
10094 return 0; /* FAIL */
10096 /* If one of the two operands is an interesting constant, load a
10097 constant with the above and mask it in with a logical operation. */
10099 if (GET_CODE (operands[2]) == CONST_INT)
10102 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10103 operands[3] = constm1_rtx, op = and_optab;
10104 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10105 operands[3] = const0_rtx, op = ior_optab;
10107 return 0; /* FAIL */
10109 else if (GET_CODE (operands[3]) == CONST_INT)
10112 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10113 operands[2] = constm1_rtx, op = and_optab;
10114 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10115 operands[2] = const0_rtx, op = ior_optab;
10117 return 0; /* FAIL */
10120 return 0; /* FAIL */
10122 orig_out = operands[0];
10123 tmp = gen_reg_rtx (mode);
10126 /* Recurse to get the constant loaded. */
10127 if (ix86_expand_int_movcc (operands) == 0)
10128 return 0; /* FAIL */
10130 /* Mask in the interesting variable. */
10131 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10133 if (!rtx_equal_p (out, orig_out))
10134 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10136 return 1; /* DONE */
10140 * For comparison with above,
10150 if (! nonimmediate_operand (operands[2], mode))
10151 operands[2] = force_reg (mode, operands[2]);
10152 if (! nonimmediate_operand (operands[3], mode))
10153 operands[3] = force_reg (mode, operands[3]);
10155 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10157 rtx tmp = gen_reg_rtx (mode);
10158 emit_move_insn (tmp, operands[3]);
10161 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10163 rtx tmp = gen_reg_rtx (mode);
10164 emit_move_insn (tmp, operands[2]);
10168 if (! register_operand (operands[2], VOIDmode)
10170 || ! register_operand (operands[3], VOIDmode)))
10171 operands[2] = force_reg (mode, operands[2]);
10174 && ! register_operand (operands[3], VOIDmode))
10175 operands[3] = force_reg (mode, operands[3]);
10177 emit_insn (compare_seq);
10178 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10179 gen_rtx_IF_THEN_ELSE (mode,
10180 compare_op, operands[2],
10183 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10184 gen_rtx_IF_THEN_ELSE (mode,
10186 copy_rtx (operands[3]),
10187 copy_rtx (operands[0]))));
10189 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10190 gen_rtx_IF_THEN_ELSE (mode,
10192 copy_rtx (operands[2]),
10193 copy_rtx (operands[0]))));
10195 return 1; /* DONE */
10199 ix86_expand_fp_movcc (operands)
10202 enum rtx_code code;
10204 rtx compare_op, second_test, bypass_test;
10206 /* For SF/DFmode conditional moves based on comparisons
10207 in same mode, we may want to use SSE min/max instructions. */
10208 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10209 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10210 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10211 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10212 && (!TARGET_IEEE_FP
10213 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10214 /* We may be called from the post-reload splitter. */
10215 && (!REG_P (operands[0])
10216 || SSE_REG_P (operands[0])
10217 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10219 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10220 code = GET_CODE (operands[1]);
10222 /* See if we have (cross) match between comparison operands and
10223 conditional move operands. */
10224 if (rtx_equal_p (operands[2], op1))
10229 code = reverse_condition_maybe_unordered (code);
10231 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10233 /* Check for min operation. */
10234 if (code == LT || code == UNLE)
10242 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10243 if (memory_operand (op0, VOIDmode))
10244 op0 = force_reg (GET_MODE (operands[0]), op0);
10245 if (GET_MODE (operands[0]) == SFmode)
10246 emit_insn (gen_minsf3 (operands[0], op0, op1));
10248 emit_insn (gen_mindf3 (operands[0], op0, op1));
10251 /* Check for max operation. */
10252 if (code == GT || code == UNGE)
10260 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10261 if (memory_operand (op0, VOIDmode))
10262 op0 = force_reg (GET_MODE (operands[0]), op0);
10263 if (GET_MODE (operands[0]) == SFmode)
10264 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10266 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10270 /* Manage condition to be sse_comparison_operator. In case we are
10271 in non-ieee mode, try to canonicalize the destination operand
10272 to be first in the comparison - this helps reload to avoid extra
10274 if (!sse_comparison_operator (operands[1], VOIDmode)
10275 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10277 rtx tmp = ix86_compare_op0;
10278 ix86_compare_op0 = ix86_compare_op1;
10279 ix86_compare_op1 = tmp;
10280 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10281 VOIDmode, ix86_compare_op0,
10284 /* Similarly try to manage result to be first operand of conditional
10285 move. We also don't support the NE comparison on SSE, so try to
10287 if ((rtx_equal_p (operands[0], operands[3])
10288 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10289 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10291 rtx tmp = operands[2];
10292 operands[2] = operands[3];
10294 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10295 (GET_CODE (operands[1])),
10296 VOIDmode, ix86_compare_op0,
10299 if (GET_MODE (operands[0]) == SFmode)
10300 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10301 operands[2], operands[3],
10302 ix86_compare_op0, ix86_compare_op1));
10304 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10305 operands[2], operands[3],
10306 ix86_compare_op0, ix86_compare_op1));
10310 /* The floating point conditional move instructions don't directly
10311 support conditions resulting from a signed integer comparison. */
10313 code = GET_CODE (operands[1]);
10314 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10316 /* The floating point conditional move instructions don't directly
10317 support signed integer comparisons. */
10319 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10321 if (second_test != NULL || bypass_test != NULL)
10323 tmp = gen_reg_rtx (QImode);
10324 ix86_expand_setcc (code, tmp);
10326 ix86_compare_op0 = tmp;
10327 ix86_compare_op1 = const0_rtx;
10328 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10330 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10332 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10333 emit_move_insn (tmp, operands[3]);
10336 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10338 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10339 emit_move_insn (tmp, operands[2]);
10343 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10344 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10349 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10350 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10355 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10356 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10364 /* Expand conditional increment or decrement using adb/sbb instructions.
10365 The default case using setcc followed by the conditional move can be
10366 done by generic code. */
10368 ix86_expand_int_addcc (operands)
10371 enum rtx_code code = GET_CODE (operands[1]);
10373 rtx val = const0_rtx;
10374 bool fpcmp = false;
10375 enum machine_mode mode = GET_MODE (operands[0]);
10377 if (operands[3] != const1_rtx
10378 && operands[3] != constm1_rtx)
10380 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10381 ix86_compare_op1, &compare_op))
10383 code = GET_CODE (compare_op);
10385 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10386 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10389 code = ix86_fp_compare_code_to_integer (code);
10396 PUT_CODE (compare_op,
10397 reverse_condition_maybe_unordered
10398 (GET_CODE (compare_op)));
10400 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10402 PUT_MODE (compare_op, mode);
10404 /* Construct either adc or sbb insn. */
10405 if ((code == LTU) == (operands[3] == constm1_rtx))
10407 switch (GET_MODE (operands[0]))
10410 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10413 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10416 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10419 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10427 switch (GET_MODE (operands[0]))
10430 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10433 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10436 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10439 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10445 return 1; /* DONE */
10449 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10450 works for floating pointer parameters and nonoffsetable memories.
10451 For pushes, it returns just stack offsets; the values will be saved
10452 in the right order. Maximally three parts are generated. */
10455 ix86_split_to_parts (operand, parts, mode)
10458 enum machine_mode mode;
10463 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10465 size = (GET_MODE_SIZE (mode) + 4) / 8;
10467 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10469 if (size < 2 || size > 3)
10472 /* Optimize constant pool reference to immediates. This is used by fp
10473 moves, that force all constants to memory to allow combining. */
10474 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10476 rtx tmp = maybe_get_pool_constant (operand);
10481 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10483 /* The only non-offsetable memories we handle are pushes. */
10484 if (! push_operand (operand, VOIDmode))
10487 operand = copy_rtx (operand);
10488 PUT_MODE (operand, Pmode);
10489 parts[0] = parts[1] = parts[2] = operand;
10491 else if (!TARGET_64BIT)
10493 if (mode == DImode)
10494 split_di (&operand, 1, &parts[0], &parts[1]);
10497 if (REG_P (operand))
10499 if (!reload_completed)
10501 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10502 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10504 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10506 else if (offsettable_memref_p (operand))
10508 operand = adjust_address (operand, SImode, 0);
10509 parts[0] = operand;
10510 parts[1] = adjust_address (operand, SImode, 4);
10512 parts[2] = adjust_address (operand, SImode, 8);
10514 else if (GET_CODE (operand) == CONST_DOUBLE)
10519 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10524 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10525 parts[2] = gen_int_mode (l[2], SImode);
10528 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10533 parts[1] = gen_int_mode (l[1], SImode);
10534 parts[0] = gen_int_mode (l[0], SImode);
10542 if (mode == TImode)
10543 split_ti (&operand, 1, &parts[0], &parts[1]);
10544 if (mode == XFmode || mode == TFmode)
10546 if (REG_P (operand))
10548 if (!reload_completed)
10550 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10551 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10553 else if (offsettable_memref_p (operand))
10555 operand = adjust_address (operand, DImode, 0);
10556 parts[0] = operand;
10557 parts[1] = adjust_address (operand, SImode, 8);
10559 else if (GET_CODE (operand) == CONST_DOUBLE)
10564 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10565 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10566 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10567 if (HOST_BITS_PER_WIDE_INT >= 64)
10570 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10571 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10574 parts[0] = immed_double_const (l[0], l[1], DImode);
10575 parts[1] = gen_int_mode (l[2], SImode);
10585 /* Emit insns to perform a move or push of DI, DF, and XF values.
10586 Return false when normal moves are needed; true when all required
10587 insns have been emitted. Operands 2-4 contain the input values
10588 int the correct order; operands 5-7 contain the output values. */
10591 ix86_split_long_move (operands)
10597 int collisions = 0;
10598 enum machine_mode mode = GET_MODE (operands[0]);
10600 /* The DFmode expanders may ask us to move double.
10601 For 64bit target this is single move. By hiding the fact
10602 here we simplify i386.md splitters. */
10603 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10605 /* Optimize constant pool reference to immediates. This is used by
10606 fp moves, that force all constants to memory to allow combining. */
10608 if (GET_CODE (operands[1]) == MEM
10609 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10610 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10611 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10612 if (push_operand (operands[0], VOIDmode))
10614 operands[0] = copy_rtx (operands[0]);
10615 PUT_MODE (operands[0], Pmode);
10618 operands[0] = gen_lowpart (DImode, operands[0]);
10619 operands[1] = gen_lowpart (DImode, operands[1]);
10620 emit_move_insn (operands[0], operands[1]);
10624 /* The only non-offsettable memory we handle is push. */
10625 if (push_operand (operands[0], VOIDmode))
10627 else if (GET_CODE (operands[0]) == MEM
10628 && ! offsettable_memref_p (operands[0]))
10631 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10632 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10634 /* When emitting push, take care for source operands on the stack. */
10635 if (push && GET_CODE (operands[1]) == MEM
10636 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10639 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10640 XEXP (part[1][2], 0));
10641 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10642 XEXP (part[1][1], 0));
10645 /* We need to do copy in the right order in case an address register
10646 of the source overlaps the destination. */
10647 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10649 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10651 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10654 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10657 /* Collision in the middle part can be handled by reordering. */
10658 if (collisions == 1 && nparts == 3
10659 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10662 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10663 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10666 /* If there are more collisions, we can't handle it by reordering.
10667 Do an lea to the last part and use only one colliding move. */
10668 else if (collisions > 1)
10674 base = part[0][nparts - 1];
10676 /* Handle the case when the last part isn't valid for lea.
10677 Happens in 64-bit mode storing the 12-byte XFmode. */
10678 if (GET_MODE (base) != Pmode)
10679 base = gen_rtx_REG (Pmode, REGNO (base));
10681 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10682 part[1][0] = replace_equiv_address (part[1][0], base);
10683 part[1][1] = replace_equiv_address (part[1][1],
10684 plus_constant (base, UNITS_PER_WORD));
10686 part[1][2] = replace_equiv_address (part[1][2],
10687 plus_constant (base, 8));
10697 /* We use only first 12 bytes of TFmode value, but for pushing we
10698 are required to adjust stack as if we were pushing real 16byte
10700 if (mode == TFmode && !TARGET_64BIT)
10701 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10703 emit_move_insn (part[0][2], part[1][2]);
10708 /* In 64bit mode we don't have 32bit push available. In case this is
10709 register, it is OK - we will just use larger counterpart. We also
10710 retype memory - these comes from attempt to avoid REX prefix on
10711 moving of second half of TFmode value. */
10712 if (GET_MODE (part[1][1]) == SImode)
10714 if (GET_CODE (part[1][1]) == MEM)
10715 part[1][1] = adjust_address (part[1][1], DImode, 0);
10716 else if (REG_P (part[1][1]))
10717 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10720 if (GET_MODE (part[1][0]) == SImode)
10721 part[1][0] = part[1][1];
10724 emit_move_insn (part[0][1], part[1][1]);
10725 emit_move_insn (part[0][0], part[1][0]);
10729 /* Choose correct order to not overwrite the source before it is copied. */
10730 if ((REG_P (part[0][0])
10731 && REG_P (part[1][1])
10732 && (REGNO (part[0][0]) == REGNO (part[1][1])
10734 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10736 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10740 operands[2] = part[0][2];
10741 operands[3] = part[0][1];
10742 operands[4] = part[0][0];
10743 operands[5] = part[1][2];
10744 operands[6] = part[1][1];
10745 operands[7] = part[1][0];
10749 operands[2] = part[0][1];
10750 operands[3] = part[0][0];
10751 operands[5] = part[1][1];
10752 operands[6] = part[1][0];
10759 operands[2] = part[0][0];
10760 operands[3] = part[0][1];
10761 operands[4] = part[0][2];
10762 operands[5] = part[1][0];
10763 operands[6] = part[1][1];
10764 operands[7] = part[1][2];
10768 operands[2] = part[0][0];
10769 operands[3] = part[0][1];
10770 operands[5] = part[1][0];
10771 operands[6] = part[1][1];
10774 emit_move_insn (operands[2], operands[5]);
10775 emit_move_insn (operands[3], operands[6]);
10777 emit_move_insn (operands[4], operands[7]);
10783 ix86_split_ashldi (operands, scratch)
10784 rtx *operands, scratch;
10786 rtx low[2], high[2];
10789 if (GET_CODE (operands[2]) == CONST_INT)
10791 split_di (operands, 2, low, high);
10792 count = INTVAL (operands[2]) & 63;
10796 emit_move_insn (high[0], low[1]);
10797 emit_move_insn (low[0], const0_rtx);
10800 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10804 if (!rtx_equal_p (operands[0], operands[1]))
10805 emit_move_insn (operands[0], operands[1]);
10806 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10807 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10812 if (!rtx_equal_p (operands[0], operands[1]))
10813 emit_move_insn (operands[0], operands[1]);
10815 split_di (operands, 1, low, high);
10817 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10818 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10820 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10822 if (! no_new_pseudos)
10823 scratch = force_reg (SImode, const0_rtx);
10825 emit_move_insn (scratch, const0_rtx);
10827 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10831 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10836 ix86_split_ashrdi (operands, scratch)
10837 rtx *operands, scratch;
10839 rtx low[2], high[2];
10842 if (GET_CODE (operands[2]) == CONST_INT)
10844 split_di (operands, 2, low, high);
10845 count = INTVAL (operands[2]) & 63;
10849 emit_move_insn (low[0], high[1]);
10851 if (! reload_completed)
10852 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10855 emit_move_insn (high[0], low[0]);
10856 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10860 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10864 if (!rtx_equal_p (operands[0], operands[1]))
10865 emit_move_insn (operands[0], operands[1]);
10866 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10867 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10872 if (!rtx_equal_p (operands[0], operands[1]))
10873 emit_move_insn (operands[0], operands[1]);
10875 split_di (operands, 1, low, high);
10877 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10878 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10880 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10882 if (! no_new_pseudos)
10883 scratch = gen_reg_rtx (SImode);
10884 emit_move_insn (scratch, high[0]);
10885 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10886 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10890 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10895 ix86_split_lshrdi (operands, scratch)
10896 rtx *operands, scratch;
10898 rtx low[2], high[2];
10901 if (GET_CODE (operands[2]) == CONST_INT)
10903 split_di (operands, 2, low, high);
10904 count = INTVAL (operands[2]) & 63;
10908 emit_move_insn (low[0], high[1]);
10909 emit_move_insn (high[0], const0_rtx);
10912 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10916 if (!rtx_equal_p (operands[0], operands[1]))
10917 emit_move_insn (operands[0], operands[1]);
10918 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10919 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10924 if (!rtx_equal_p (operands[0], operands[1]))
10925 emit_move_insn (operands[0], operands[1]);
10927 split_di (operands, 1, low, high);
10929 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10930 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10932 /* Heh. By reversing the arguments, we can reuse this pattern. */
10933 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10935 if (! no_new_pseudos)
10936 scratch = force_reg (SImode, const0_rtx);
10938 emit_move_insn (scratch, const0_rtx);
10940 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10944 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10948 /* Helper function for the string operations below. Dest VARIABLE whether
10949 it is aligned to VALUE bytes. If true, jump to the label. */
10951 ix86_expand_aligntest (variable, value)
10955 rtx label = gen_label_rtx ();
10956 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10957 if (GET_MODE (variable) == DImode)
10958 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10960 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10961 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10966 /* Adjust COUNTER by the VALUE. */
10968 ix86_adjust_counter (countreg, value)
10970 HOST_WIDE_INT value;
10972 if (GET_MODE (countreg) == DImode)
10973 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10975 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10978 /* Zero extend possibly SImode EXP to Pmode register. */
10980 ix86_zero_extend_to_Pmode (exp)
10984 if (GET_MODE (exp) == VOIDmode)
10985 return force_reg (Pmode, exp);
10986 if (GET_MODE (exp) == Pmode)
10987 return copy_to_mode_reg (Pmode, exp);
10988 r = gen_reg_rtx (Pmode);
10989 emit_insn (gen_zero_extendsidi2 (r, exp));
10993 /* Expand string move (memcpy) operation. Use i386 string operations when
10994 profitable. expand_clrstr contains similar code. */
10996 ix86_expand_movstr (dst, src, count_exp, align_exp)
10997 rtx dst, src, count_exp, align_exp;
10999 rtx srcreg, destreg, countreg;
11000 enum machine_mode counter_mode;
11001 HOST_WIDE_INT align = 0;
11002 unsigned HOST_WIDE_INT count = 0;
11005 if (GET_CODE (align_exp) == CONST_INT)
11006 align = INTVAL (align_exp);
11008 /* Can't use any of this if the user has appropriated esi or edi. */
11009 if (global_regs[4] || global_regs[5])
11012 /* This simple hack avoids all inlining code and simplifies code below. */
11013 if (!TARGET_ALIGN_STRINGOPS)
11016 if (GET_CODE (count_exp) == CONST_INT)
11018 count = INTVAL (count_exp);
11019 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11023 /* Figure out proper mode for counter. For 32bits it is always SImode,
11024 for 64bits use SImode when possible, otherwise DImode.
11025 Set count to number of bytes copied when known at compile time. */
11026 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11027 || x86_64_zero_extended_value (count_exp))
11028 counter_mode = SImode;
11030 counter_mode = DImode;
11034 if (counter_mode != SImode && counter_mode != DImode)
11037 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11038 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11040 emit_insn (gen_cld ());
11042 /* When optimizing for size emit simple rep ; movsb instruction for
11043 counts not divisible by 4. */
11045 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11047 countreg = ix86_zero_extend_to_Pmode (count_exp);
11049 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11050 destreg, srcreg, countreg));
11052 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11053 destreg, srcreg, countreg));
11056 /* For constant aligned (or small unaligned) copies use rep movsl
11057 followed by code copying the rest. For PentiumPro ensure 8 byte
11058 alignment to allow rep movsl acceleration. */
11060 else if (count != 0
11062 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11063 || optimize_size || count < (unsigned int) 64))
11065 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11066 if (count & ~(size - 1))
11068 countreg = copy_to_mode_reg (counter_mode,
11069 GEN_INT ((count >> (size == 4 ? 2 : 3))
11070 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11071 countreg = ix86_zero_extend_to_Pmode (countreg);
11075 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11076 destreg, srcreg, countreg));
11078 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11079 destreg, srcreg, countreg));
11082 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11083 destreg, srcreg, countreg));
11085 if (size == 8 && (count & 0x04))
11086 emit_insn (gen_strmovsi (destreg, srcreg));
11088 emit_insn (gen_strmovhi (destreg, srcreg));
11090 emit_insn (gen_strmovqi (destreg, srcreg));
11092 /* The generic code based on the glibc implementation:
11093 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11094 allowing accelerated copying there)
11095 - copy the data using rep movsl
11096 - copy the rest. */
11101 int desired_alignment = (TARGET_PENTIUMPRO
11102 && (count == 0 || count >= (unsigned int) 260)
11103 ? 8 : UNITS_PER_WORD);
11105 /* In case we don't know anything about the alignment, default to
11106 library version, since it is usually equally fast and result in
11109 Also emit call when we know that the count is large and call overhead
11110 will not be important. */
11111 if (!TARGET_INLINE_ALL_STRINGOPS
11112 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11118 if (TARGET_SINGLE_STRINGOP)
11119 emit_insn (gen_cld ());
11121 countreg2 = gen_reg_rtx (Pmode);
11122 countreg = copy_to_mode_reg (counter_mode, count_exp);
11124 /* We don't use loops to align destination and to copy parts smaller
11125 than 4 bytes, because gcc is able to optimize such code better (in
11126 the case the destination or the count really is aligned, gcc is often
11127 able to predict the branches) and also it is friendlier to the
11128 hardware branch prediction.
11130 Using loops is beneficial for generic case, because we can
11131 handle small counts using the loops. Many CPUs (such as Athlon)
11132 have large REP prefix setup costs.
11134 This is quite costly. Maybe we can revisit this decision later or
11135 add some customizability to this code. */
11137 if (count == 0 && align < desired_alignment)
11139 label = gen_label_rtx ();
11140 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11141 LEU, 0, counter_mode, 1, label);
11145 rtx label = ix86_expand_aligntest (destreg, 1);
11146 emit_insn (gen_strmovqi (destreg, srcreg));
11147 ix86_adjust_counter (countreg, 1);
11148 emit_label (label);
11149 LABEL_NUSES (label) = 1;
11153 rtx label = ix86_expand_aligntest (destreg, 2);
11154 emit_insn (gen_strmovhi (destreg, srcreg));
11155 ix86_adjust_counter (countreg, 2);
11156 emit_label (label);
11157 LABEL_NUSES (label) = 1;
11159 if (align <= 4 && desired_alignment > 4)
11161 rtx label = ix86_expand_aligntest (destreg, 4);
11162 emit_insn (gen_strmovsi (destreg, srcreg));
11163 ix86_adjust_counter (countreg, 4);
11164 emit_label (label);
11165 LABEL_NUSES (label) = 1;
11168 if (label && desired_alignment > 4 && !TARGET_64BIT)
11170 emit_label (label);
11171 LABEL_NUSES (label) = 1;
11174 if (!TARGET_SINGLE_STRINGOP)
11175 emit_insn (gen_cld ());
11178 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11180 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11181 destreg, srcreg, countreg2));
11185 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11186 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11187 destreg, srcreg, countreg2));
11192 emit_label (label);
11193 LABEL_NUSES (label) = 1;
11195 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11196 emit_insn (gen_strmovsi (destreg, srcreg));
11197 if ((align <= 4 || count == 0) && TARGET_64BIT)
11199 rtx label = ix86_expand_aligntest (countreg, 4);
11200 emit_insn (gen_strmovsi (destreg, srcreg));
11201 emit_label (label);
11202 LABEL_NUSES (label) = 1;
11204 if (align > 2 && count != 0 && (count & 2))
11205 emit_insn (gen_strmovhi (destreg, srcreg));
11206 if (align <= 2 || count == 0)
11208 rtx label = ix86_expand_aligntest (countreg, 2);
11209 emit_insn (gen_strmovhi (destreg, srcreg));
11210 emit_label (label);
11211 LABEL_NUSES (label) = 1;
11213 if (align > 1 && count != 0 && (count & 1))
11214 emit_insn (gen_strmovqi (destreg, srcreg));
11215 if (align <= 1 || count == 0)
11217 rtx label = ix86_expand_aligntest (countreg, 1);
11218 emit_insn (gen_strmovqi (destreg, srcreg));
11219 emit_label (label);
11220 LABEL_NUSES (label) = 1;
11224 insns = get_insns ();
11227 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11232 /* Expand string clear operation (bzero). Use i386 string operations when
11233 profitable. expand_movstr contains similar code. */
11235 ix86_expand_clrstr (src, count_exp, align_exp)
11236 rtx src, count_exp, align_exp;
11238 rtx destreg, zeroreg, countreg;
11239 enum machine_mode counter_mode;
11240 HOST_WIDE_INT align = 0;
11241 unsigned HOST_WIDE_INT count = 0;
11243 if (GET_CODE (align_exp) == CONST_INT)
11244 align = INTVAL (align_exp);
11246 /* Can't use any of this if the user has appropriated esi. */
11247 if (global_regs[4])
11250 /* This simple hack avoids all inlining code and simplifies code below. */
11251 if (!TARGET_ALIGN_STRINGOPS)
11254 if (GET_CODE (count_exp) == CONST_INT)
11256 count = INTVAL (count_exp);
11257 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11260 /* Figure out proper mode for counter. For 32bits it is always SImode,
11261 for 64bits use SImode when possible, otherwise DImode.
11262 Set count to number of bytes copied when known at compile time. */
11263 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11264 || x86_64_zero_extended_value (count_exp))
11265 counter_mode = SImode;
11267 counter_mode = DImode;
11269 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11271 emit_insn (gen_cld ());
11273 /* When optimizing for size emit simple rep ; movsb instruction for
11274 counts not divisible by 4. */
11276 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11278 countreg = ix86_zero_extend_to_Pmode (count_exp);
11279 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11281 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11282 destreg, countreg));
11284 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11285 destreg, countreg));
11287 else if (count != 0
11289 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11290 || optimize_size || count < (unsigned int) 64))
11292 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11293 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11294 if (count & ~(size - 1))
11296 countreg = copy_to_mode_reg (counter_mode,
11297 GEN_INT ((count >> (size == 4 ? 2 : 3))
11298 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11299 countreg = ix86_zero_extend_to_Pmode (countreg);
11303 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11304 destreg, countreg));
11306 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11307 destreg, countreg));
11310 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11311 destreg, countreg));
11313 if (size == 8 && (count & 0x04))
11314 emit_insn (gen_strsetsi (destreg,
11315 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11317 emit_insn (gen_strsethi (destreg,
11318 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11320 emit_insn (gen_strsetqi (destreg,
11321 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11327 /* Compute desired alignment of the string operation. */
11328 int desired_alignment = (TARGET_PENTIUMPRO
11329 && (count == 0 || count >= (unsigned int) 260)
11330 ? 8 : UNITS_PER_WORD);
11332 /* In case we don't know anything about the alignment, default to
11333 library version, since it is usually equally fast and result in
11336 Also emit call when we know that the count is large and call overhead
11337 will not be important. */
11338 if (!TARGET_INLINE_ALL_STRINGOPS
11339 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11342 if (TARGET_SINGLE_STRINGOP)
11343 emit_insn (gen_cld ());
11345 countreg2 = gen_reg_rtx (Pmode);
11346 countreg = copy_to_mode_reg (counter_mode, count_exp);
11347 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11349 if (count == 0 && align < desired_alignment)
11351 label = gen_label_rtx ();
11352 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11353 LEU, 0, counter_mode, 1, label);
11357 rtx label = ix86_expand_aligntest (destreg, 1);
11358 emit_insn (gen_strsetqi (destreg,
11359 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11360 ix86_adjust_counter (countreg, 1);
11361 emit_label (label);
11362 LABEL_NUSES (label) = 1;
11366 rtx label = ix86_expand_aligntest (destreg, 2);
11367 emit_insn (gen_strsethi (destreg,
11368 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11369 ix86_adjust_counter (countreg, 2);
11370 emit_label (label);
11371 LABEL_NUSES (label) = 1;
11373 if (align <= 4 && desired_alignment > 4)
11375 rtx label = ix86_expand_aligntest (destreg, 4);
11376 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11377 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11379 ix86_adjust_counter (countreg, 4);
11380 emit_label (label);
11381 LABEL_NUSES (label) = 1;
11384 if (label && desired_alignment > 4 && !TARGET_64BIT)
11386 emit_label (label);
11387 LABEL_NUSES (label) = 1;
11391 if (!TARGET_SINGLE_STRINGOP)
11392 emit_insn (gen_cld ());
11395 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11397 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11398 destreg, countreg2));
11402 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11403 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11404 destreg, countreg2));
11408 emit_label (label);
11409 LABEL_NUSES (label) = 1;
11412 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11413 emit_insn (gen_strsetsi (destreg,
11414 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11415 if (TARGET_64BIT && (align <= 4 || count == 0))
11417 rtx label = ix86_expand_aligntest (countreg, 4);
11418 emit_insn (gen_strsetsi (destreg,
11419 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11420 emit_label (label);
11421 LABEL_NUSES (label) = 1;
11423 if (align > 2 && count != 0 && (count & 2))
11424 emit_insn (gen_strsethi (destreg,
11425 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11426 if (align <= 2 || count == 0)
11428 rtx label = ix86_expand_aligntest (countreg, 2);
11429 emit_insn (gen_strsethi (destreg,
11430 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11431 emit_label (label);
11432 LABEL_NUSES (label) = 1;
11434 if (align > 1 && count != 0 && (count & 1))
11435 emit_insn (gen_strsetqi (destreg,
11436 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11437 if (align <= 1 || count == 0)
11439 rtx label = ix86_expand_aligntest (countreg, 1);
11440 emit_insn (gen_strsetqi (destreg,
11441 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11442 emit_label (label);
11443 LABEL_NUSES (label) = 1;
11448 /* Expand strlen. */
11450 ix86_expand_strlen (out, src, eoschar, align)
11451 rtx out, src, eoschar, align;
11453 rtx addr, scratch1, scratch2, scratch3, scratch4;
11455 /* The generic case of strlen expander is long. Avoid it's
11456 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11458 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11459 && !TARGET_INLINE_ALL_STRINGOPS
11461 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11464 addr = force_reg (Pmode, XEXP (src, 0));
11465 scratch1 = gen_reg_rtx (Pmode);
11467 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11470 /* Well it seems that some optimizer does not combine a call like
11471 foo(strlen(bar), strlen(bar));
11472 when the move and the subtraction is done here. It does calculate
11473 the length just once when these instructions are done inside of
11474 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11475 often used and I use one fewer register for the lifetime of
11476 output_strlen_unroll() this is better. */
11478 emit_move_insn (out, addr);
11480 ix86_expand_strlensi_unroll_1 (out, align);
11482 /* strlensi_unroll_1 returns the address of the zero at the end of
11483 the string, like memchr(), so compute the length by subtracting
11484 the start address. */
11486 emit_insn (gen_subdi3 (out, out, addr));
11488 emit_insn (gen_subsi3 (out, out, addr));
11492 scratch2 = gen_reg_rtx (Pmode);
11493 scratch3 = gen_reg_rtx (Pmode);
11494 scratch4 = force_reg (Pmode, constm1_rtx);
11496 emit_move_insn (scratch3, addr);
11497 eoschar = force_reg (QImode, eoschar);
11499 emit_insn (gen_cld ());
11502 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11503 align, scratch4, scratch3));
11504 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11505 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11509 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11510 align, scratch4, scratch3));
11511 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11512 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11518 /* Expand the appropriate insns for doing strlen if not just doing
11521 out = result, initialized with the start address
11522 align_rtx = alignment of the address.
11523 scratch = scratch register, initialized with the startaddress when
11524 not aligned, otherwise undefined
11526 This is just the body. It needs the initialisations mentioned above and
11527 some address computing at the end. These things are done in i386.md. */
11530 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11531 rtx out, align_rtx;
11535 rtx align_2_label = NULL_RTX;
11536 rtx align_3_label = NULL_RTX;
11537 rtx align_4_label = gen_label_rtx ();
11538 rtx end_0_label = gen_label_rtx ();
11540 rtx tmpreg = gen_reg_rtx (SImode);
11541 rtx scratch = gen_reg_rtx (SImode);
11545 if (GET_CODE (align_rtx) == CONST_INT)
11546 align = INTVAL (align_rtx);
11548 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11550 /* Is there a known alignment and is it less than 4? */
11553 rtx scratch1 = gen_reg_rtx (Pmode);
11554 emit_move_insn (scratch1, out);
11555 /* Is there a known alignment and is it not 2? */
11558 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11559 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11561 /* Leave just the 3 lower bits. */
11562 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11563 NULL_RTX, 0, OPTAB_WIDEN);
11565 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11566 Pmode, 1, align_4_label);
11567 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11568 Pmode, 1, align_2_label);
11569 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11570 Pmode, 1, align_3_label);
11574 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11575 check if is aligned to 4 - byte. */
11577 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11578 NULL_RTX, 0, OPTAB_WIDEN);
11580 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11581 Pmode, 1, align_4_label);
11584 mem = gen_rtx_MEM (QImode, out);
11586 /* Now compare the bytes. */
11588 /* Compare the first n unaligned byte on a byte per byte basis. */
11589 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11590 QImode, 1, end_0_label);
11592 /* Increment the address. */
11594 emit_insn (gen_adddi3 (out, out, const1_rtx));
11596 emit_insn (gen_addsi3 (out, out, const1_rtx));
11598 /* Not needed with an alignment of 2 */
11601 emit_label (align_2_label);
11603 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11607 emit_insn (gen_adddi3 (out, out, const1_rtx));
11609 emit_insn (gen_addsi3 (out, out, const1_rtx));
11611 emit_label (align_3_label);
11614 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11618 emit_insn (gen_adddi3 (out, out, const1_rtx));
11620 emit_insn (gen_addsi3 (out, out, const1_rtx));
11623 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11624 align this loop. It gives only huge programs, but does not help to
11626 emit_label (align_4_label);
11628 mem = gen_rtx_MEM (SImode, out);
11629 emit_move_insn (scratch, mem);
11631 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11633 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11635 /* This formula yields a nonzero result iff one of the bytes is zero.
11636 This saves three branches inside loop and many cycles. */
11638 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11639 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11640 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11641 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11642 gen_int_mode (0x80808080, SImode)));
11643 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11648 rtx reg = gen_reg_rtx (SImode);
11649 rtx reg2 = gen_reg_rtx (Pmode);
11650 emit_move_insn (reg, tmpreg);
11651 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11653 /* If zero is not in the first two bytes, move two bytes forward. */
11654 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11655 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11656 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11657 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11658 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11661 /* Emit lea manually to avoid clobbering of flags. */
11662 emit_insn (gen_rtx_SET (SImode, reg2,
11663 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11665 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11666 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11667 emit_insn (gen_rtx_SET (VOIDmode, out,
11668 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11675 rtx end_2_label = gen_label_rtx ();
11676 /* Is zero in the first two bytes? */
11678 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11679 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11680 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11681 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11682 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11684 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11685 JUMP_LABEL (tmp) = end_2_label;
11687 /* Not in the first two. Move two bytes forward. */
11688 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11690 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11692 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11694 emit_label (end_2_label);
11698 /* Avoid branch in fixing the byte. */
11699 tmpreg = gen_lowpart (QImode, tmpreg);
11700 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11701 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11703 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11705 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11707 emit_label (end_0_label);
11711 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11712 rtx retval, fnaddr, callarg1, callarg2, pop;
11715 rtx use = NULL, call;
11717 if (pop == const0_rtx)
11719 if (TARGET_64BIT && pop)
11723 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11724 fnaddr = machopic_indirect_call_target (fnaddr);
11726 /* Static functions and indirect calls don't need the pic register. */
11727 if (! TARGET_64BIT && flag_pic
11728 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11729 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11730 use_reg (&use, pic_offset_table_rtx);
11732 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11734 rtx al = gen_rtx_REG (QImode, 0);
11735 emit_move_insn (al, callarg2);
11736 use_reg (&use, al);
11738 #endif /* TARGET_MACHO */
11740 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11742 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11743 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11745 if (sibcall && TARGET_64BIT
11746 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11749 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11750 fnaddr = gen_rtx_REG (Pmode, 40);
11751 emit_move_insn (fnaddr, addr);
11752 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11755 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11757 call = gen_rtx_SET (VOIDmode, retval, call);
11760 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11761 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11762 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11765 call = emit_call_insn (call);
11767 CALL_INSN_FUNCTION_USAGE (call) = use;
11771 /* Clear stack slot assignments remembered from previous functions.
11772 This is called from INIT_EXPANDERS once before RTL is emitted for each
11775 static struct machine_function *
11776 ix86_init_machine_status ()
11778 struct machine_function *f;
11780 f = ggc_alloc_cleared (sizeof (struct machine_function));
11781 f->use_fast_prologue_epilogue_nregs = -1;
11786 /* Return a MEM corresponding to a stack slot with mode MODE.
11787 Allocate a new slot if necessary.
11789 The RTL for a function can have several slots available: N is
11790 which slot to use. */
11793 assign_386_stack_local (mode, n)
11794 enum machine_mode mode;
11797 struct stack_local_entry *s;
11799 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11802 for (s = ix86_stack_locals; s; s = s->next)
11803 if (s->mode == mode && s->n == n)
11806 s = (struct stack_local_entry *)
11807 ggc_alloc (sizeof (struct stack_local_entry));
11810 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11812 s->next = ix86_stack_locals;
11813 ix86_stack_locals = s;
11817 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11819 static GTY(()) rtx ix86_tls_symbol;
11821 ix86_tls_get_addr ()
11824 if (!ix86_tls_symbol)
11826 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11827 (TARGET_GNU_TLS && !TARGET_64BIT)
11828 ? "___tls_get_addr"
11829 : "__tls_get_addr");
11832 return ix86_tls_symbol;
11835 /* Calculate the length of the memory address in the instruction
11836 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11839 memory_address_length (addr)
11842 struct ix86_address parts;
11843 rtx base, index, disp;
11846 if (GET_CODE (addr) == PRE_DEC
11847 || GET_CODE (addr) == POST_INC
11848 || GET_CODE (addr) == PRE_MODIFY
11849 || GET_CODE (addr) == POST_MODIFY)
11852 if (! ix86_decompose_address (addr, &parts))
11856 index = parts.index;
11860 /* Register Indirect. */
11861 if (base && !index && !disp)
11863 /* Special cases: ebp and esp need the two-byte modrm form. */
11864 if (addr == stack_pointer_rtx
11865 || addr == arg_pointer_rtx
11866 || addr == frame_pointer_rtx
11867 || addr == hard_frame_pointer_rtx)
11871 /* Direct Addressing. */
11872 else if (disp && !base && !index)
11877 /* Find the length of the displacement constant. */
11880 if (GET_CODE (disp) == CONST_INT
11881 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11888 /* An index requires the two-byte modrm form. */
11896 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11897 is set, expect that insn have 8bit immediate alternative. */
11899 ix86_attr_length_immediate_default (insn, shortform)
11905 extract_insn_cached (insn);
11906 for (i = recog_data.n_operands - 1; i >= 0; --i)
11907 if (CONSTANT_P (recog_data.operand[i]))
11912 && GET_CODE (recog_data.operand[i]) == CONST_INT
11913 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11917 switch (get_attr_mode (insn))
11928 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11933 fatal_insn ("unknown insn mode", insn);
11939 /* Compute default value for "length_address" attribute. */
11941 ix86_attr_length_address_default (insn)
11946 if (get_attr_type (insn) == TYPE_LEA)
11948 rtx set = PATTERN (insn);
11949 if (GET_CODE (set) == SET)
11951 else if (GET_CODE (set) == PARALLEL
11952 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11953 set = XVECEXP (set, 0, 0);
11956 #ifdef ENABLE_CHECKING
11962 return memory_address_length (SET_SRC (set));
11965 extract_insn_cached (insn);
11966 for (i = recog_data.n_operands - 1; i >= 0; --i)
11967 if (GET_CODE (recog_data.operand[i]) == MEM)
11969 return memory_address_length (XEXP (recog_data.operand[i], 0));
11975 /* Return the maximum number of instructions a cpu can issue. */
11982 case PROCESSOR_PENTIUM:
11986 case PROCESSOR_PENTIUMPRO:
11987 case PROCESSOR_PENTIUM4:
11988 case PROCESSOR_ATHLON:
11997 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11998 by DEP_INSN and nothing set by DEP_INSN. */
12001 ix86_flags_dependant (insn, dep_insn, insn_type)
12002 rtx insn, dep_insn;
12003 enum attr_type insn_type;
12007 /* Simplify the test for uninteresting insns. */
12008 if (insn_type != TYPE_SETCC
12009 && insn_type != TYPE_ICMOV
12010 && insn_type != TYPE_FCMOV
12011 && insn_type != TYPE_IBR)
12014 if ((set = single_set (dep_insn)) != 0)
12016 set = SET_DEST (set);
12019 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12020 && XVECLEN (PATTERN (dep_insn), 0) == 2
12021 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12022 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12024 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12025 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12030 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12033 /* This test is true if the dependent insn reads the flags but
12034 not any other potentially set register. */
12035 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12038 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12044 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12045 address with operands set by DEP_INSN. */
12048 ix86_agi_dependant (insn, dep_insn, insn_type)
12049 rtx insn, dep_insn;
12050 enum attr_type insn_type;
12054 if (insn_type == TYPE_LEA
12057 addr = PATTERN (insn);
12058 if (GET_CODE (addr) == SET)
12060 else if (GET_CODE (addr) == PARALLEL
12061 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12062 addr = XVECEXP (addr, 0, 0);
12065 addr = SET_SRC (addr);
12070 extract_insn_cached (insn);
12071 for (i = recog_data.n_operands - 1; i >= 0; --i)
12072 if (GET_CODE (recog_data.operand[i]) == MEM)
12074 addr = XEXP (recog_data.operand[i], 0);
12081 return modified_in_p (addr, dep_insn);
12085 ix86_adjust_cost (insn, link, dep_insn, cost)
12086 rtx insn, link, dep_insn;
12089 enum attr_type insn_type, dep_insn_type;
12090 enum attr_memory memory, dep_memory;
12092 int dep_insn_code_number;
12094 /* Anti and output dependencies have zero cost on all CPUs. */
12095 if (REG_NOTE_KIND (link) != 0)
12098 dep_insn_code_number = recog_memoized (dep_insn);
12100 /* If we can't recognize the insns, we can't really do anything. */
12101 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12104 insn_type = get_attr_type (insn);
12105 dep_insn_type = get_attr_type (dep_insn);
12109 case PROCESSOR_PENTIUM:
12110 /* Address Generation Interlock adds a cycle of latency. */
12111 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12114 /* ??? Compares pair with jump/setcc. */
12115 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12118 /* Floating point stores require value to be ready one cycle earlier. */
12119 if (insn_type == TYPE_FMOV
12120 && get_attr_memory (insn) == MEMORY_STORE
12121 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12125 case PROCESSOR_PENTIUMPRO:
12126 memory = get_attr_memory (insn);
12127 dep_memory = get_attr_memory (dep_insn);
12129 /* Since we can't represent delayed latencies of load+operation,
12130 increase the cost here for non-imov insns. */
12131 if (dep_insn_type != TYPE_IMOV
12132 && dep_insn_type != TYPE_FMOV
12133 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12136 /* INT->FP conversion is expensive. */
12137 if (get_attr_fp_int_src (dep_insn))
12140 /* There is one cycle extra latency between an FP op and a store. */
12141 if (insn_type == TYPE_FMOV
12142 && (set = single_set (dep_insn)) != NULL_RTX
12143 && (set2 = single_set (insn)) != NULL_RTX
12144 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12145 && GET_CODE (SET_DEST (set2)) == MEM)
12148 /* Show ability of reorder buffer to hide latency of load by executing
12149 in parallel with previous instruction in case
12150 previous instruction is not needed to compute the address. */
12151 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12152 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12154 /* Claim moves to take one cycle, as core can issue one load
12155 at time and the next load can start cycle later. */
12156 if (dep_insn_type == TYPE_IMOV
12157 || dep_insn_type == TYPE_FMOV)
12165 memory = get_attr_memory (insn);
12166 dep_memory = get_attr_memory (dep_insn);
12167 /* The esp dependency is resolved before the instruction is really
12169 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12170 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12173 /* Since we can't represent delayed latencies of load+operation,
12174 increase the cost here for non-imov insns. */
12175 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12176 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12178 /* INT->FP conversion is expensive. */
12179 if (get_attr_fp_int_src (dep_insn))
12182 /* Show ability of reorder buffer to hide latency of load by executing
12183 in parallel with previous instruction in case
12184 previous instruction is not needed to compute the address. */
12185 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12186 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12188 /* Claim moves to take one cycle, as core can issue one load
12189 at time and the next load can start cycle later. */
12190 if (dep_insn_type == TYPE_IMOV
12191 || dep_insn_type == TYPE_FMOV)
12200 case PROCESSOR_ATHLON:
12202 memory = get_attr_memory (insn);
12203 dep_memory = get_attr_memory (dep_insn);
12205 /* Show ability of reorder buffer to hide latency of load by executing
12206 in parallel with previous instruction in case
12207 previous instruction is not needed to compute the address. */
12208 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12209 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12211 enum attr_unit unit = get_attr_unit (insn);
12214 /* Because of the difference between the length of integer and
12215 floating unit pipeline preparation stages, the memory operands
12216 for floating point are cheaper.
12218 ??? For Athlon it the difference is most propbably 2. */
12219 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12222 loadcost = TARGET_ATHLON ? 2 : 0;
12224 if (cost >= loadcost)
12239 struct ppro_sched_data
12242 int issued_this_cycle;
12246 static enum attr_ppro_uops
12247 ix86_safe_ppro_uops (insn)
12250 if (recog_memoized (insn) >= 0)
12251 return get_attr_ppro_uops (insn);
12253 return PPRO_UOPS_MANY;
12257 ix86_dump_ppro_packet (dump)
12260 if (ix86_sched_data.ppro.decode[0])
12262 fprintf (dump, "PPRO packet: %d",
12263 INSN_UID (ix86_sched_data.ppro.decode[0]));
12264 if (ix86_sched_data.ppro.decode[1])
12265 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12266 if (ix86_sched_data.ppro.decode[2])
12267 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12268 fputc ('\n', dump);
12272 /* We're beginning a new block. Initialize data structures as necessary. */
12275 ix86_sched_init (dump, sched_verbose, veclen)
12276 FILE *dump ATTRIBUTE_UNUSED;
12277 int sched_verbose ATTRIBUTE_UNUSED;
12278 int veclen ATTRIBUTE_UNUSED;
12280 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12283 /* Shift INSN to SLOT, and shift everything else down. */
12286 ix86_reorder_insn (insnp, slot)
12293 insnp[0] = insnp[1];
12294 while (++insnp != slot);
12300 ix86_sched_reorder_ppro (ready, e_ready)
12305 enum attr_ppro_uops cur_uops;
12306 int issued_this_cycle;
12310 /* At this point .ppro.decode contains the state of the three
12311 decoders from last "cycle". That is, those insns that were
12312 actually independent. But here we're scheduling for the
12313 decoder, and we may find things that are decodable in the
12316 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12317 issued_this_cycle = 0;
12320 cur_uops = ix86_safe_ppro_uops (*insnp);
12322 /* If the decoders are empty, and we've a complex insn at the
12323 head of the priority queue, let it issue without complaint. */
12324 if (decode[0] == NULL)
12326 if (cur_uops == PPRO_UOPS_MANY)
12328 decode[0] = *insnp;
12332 /* Otherwise, search for a 2-4 uop unsn to issue. */
12333 while (cur_uops != PPRO_UOPS_FEW)
12335 if (insnp == ready)
12337 cur_uops = ix86_safe_ppro_uops (*--insnp);
12340 /* If so, move it to the head of the line. */
12341 if (cur_uops == PPRO_UOPS_FEW)
12342 ix86_reorder_insn (insnp, e_ready);
12344 /* Issue the head of the queue. */
12345 issued_this_cycle = 1;
12346 decode[0] = *e_ready--;
12349 /* Look for simple insns to fill in the other two slots. */
12350 for (i = 1; i < 3; ++i)
12351 if (decode[i] == NULL)
12353 if (ready > e_ready)
12357 cur_uops = ix86_safe_ppro_uops (*insnp);
12358 while (cur_uops != PPRO_UOPS_ONE)
12360 if (insnp == ready)
12362 cur_uops = ix86_safe_ppro_uops (*--insnp);
12365 /* Found one. Move it to the head of the queue and issue it. */
12366 if (cur_uops == PPRO_UOPS_ONE)
12368 ix86_reorder_insn (insnp, e_ready);
12369 decode[i] = *e_ready--;
12370 issued_this_cycle++;
12374 /* ??? Didn't find one. Ideally, here we would do a lazy split
12375 of 2-uop insns, issue one and queue the other. */
12379 if (issued_this_cycle == 0)
12380 issued_this_cycle = 1;
12381 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12384 /* We are about to being issuing insns for this clock cycle.
12385 Override the default sort algorithm to better slot instructions. */
12387 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12388 FILE *dump ATTRIBUTE_UNUSED;
12389 int sched_verbose ATTRIBUTE_UNUSED;
12392 int clock_var ATTRIBUTE_UNUSED;
12394 int n_ready = *n_readyp;
12395 rtx *e_ready = ready + n_ready - 1;
12397 /* Make sure to go ahead and initialize key items in
12398 ix86_sched_data if we are not going to bother trying to
12399 reorder the ready queue. */
12402 ix86_sched_data.ppro.issued_this_cycle = 1;
12411 case PROCESSOR_PENTIUMPRO:
12412 ix86_sched_reorder_ppro (ready, e_ready);
12417 return ix86_issue_rate ();
12420 /* We are about to issue INSN. Return the number of insns left on the
12421 ready queue that can be issued this cycle. */
12424 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12428 int can_issue_more;
12434 return can_issue_more - 1;
12436 case PROCESSOR_PENTIUMPRO:
12438 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12440 if (uops == PPRO_UOPS_MANY)
12443 ix86_dump_ppro_packet (dump);
12444 ix86_sched_data.ppro.decode[0] = insn;
12445 ix86_sched_data.ppro.decode[1] = NULL;
12446 ix86_sched_data.ppro.decode[2] = NULL;
12448 ix86_dump_ppro_packet (dump);
12449 ix86_sched_data.ppro.decode[0] = NULL;
12451 else if (uops == PPRO_UOPS_FEW)
12454 ix86_dump_ppro_packet (dump);
12455 ix86_sched_data.ppro.decode[0] = insn;
12456 ix86_sched_data.ppro.decode[1] = NULL;
12457 ix86_sched_data.ppro.decode[2] = NULL;
12461 for (i = 0; i < 3; ++i)
12462 if (ix86_sched_data.ppro.decode[i] == NULL)
12464 ix86_sched_data.ppro.decode[i] = insn;
12472 ix86_dump_ppro_packet (dump);
12473 ix86_sched_data.ppro.decode[0] = NULL;
12474 ix86_sched_data.ppro.decode[1] = NULL;
12475 ix86_sched_data.ppro.decode[2] = NULL;
12479 return --ix86_sched_data.ppro.issued_this_cycle;
12484 ia32_use_dfa_pipeline_interface ()
12486 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12491 /* How many alternative schedules to try. This should be as wide as the
12492 scheduling freedom in the DFA, but no wider. Making this value too
12493 large results extra work for the scheduler. */
12496 ia32_multipass_dfa_lookahead ()
12498 if (ix86_tune == PROCESSOR_PENTIUM)
12505 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12506 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12510 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12512 rtx dstref, srcref, dstreg, srcreg;
12516 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12518 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12522 /* Subroutine of above to actually do the updating by recursively walking
12526 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12528 rtx dstref, srcref, dstreg, srcreg;
12530 enum rtx_code code = GET_CODE (x);
12531 const char *format_ptr = GET_RTX_FORMAT (code);
12534 if (code == MEM && XEXP (x, 0) == dstreg)
12535 MEM_COPY_ATTRIBUTES (x, dstref);
12536 else if (code == MEM && XEXP (x, 0) == srcreg)
12537 MEM_COPY_ATTRIBUTES (x, srcref);
12539 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12541 if (*format_ptr == 'e')
12542 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12544 else if (*format_ptr == 'E')
12545 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12546 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12551 /* Compute the alignment given to a constant that is being placed in memory.
12552 EXP is the constant and ALIGN is the alignment that the object would
12554 The value of this function is used instead of that alignment to align
12558 ix86_constant_alignment (exp, align)
12562 if (TREE_CODE (exp) == REAL_CST)
12564 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12566 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12569 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12576 /* Compute the alignment for a static variable.
12577 TYPE is the data type, and ALIGN is the alignment that
12578 the object would ordinarily have. The value of this function is used
12579 instead of that alignment to align the object. */
12582 ix86_data_alignment (type, align)
12586 if (AGGREGATE_TYPE_P (type)
12587 && TYPE_SIZE (type)
12588 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12589 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12590 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12593 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12594 to 16byte boundary. */
12597 if (AGGREGATE_TYPE_P (type)
12598 && TYPE_SIZE (type)
12599 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12600 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12601 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12605 if (TREE_CODE (type) == ARRAY_TYPE)
12607 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12609 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12612 else if (TREE_CODE (type) == COMPLEX_TYPE)
12615 if (TYPE_MODE (type) == DCmode && align < 64)
12617 if (TYPE_MODE (type) == XCmode && align < 128)
12620 else if ((TREE_CODE (type) == RECORD_TYPE
12621 || TREE_CODE (type) == UNION_TYPE
12622 || TREE_CODE (type) == QUAL_UNION_TYPE)
12623 && TYPE_FIELDS (type))
12625 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12627 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12630 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12631 || TREE_CODE (type) == INTEGER_TYPE)
12633 if (TYPE_MODE (type) == DFmode && align < 64)
12635 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12642 /* Compute the alignment for a local variable.
12643 TYPE is the data type, and ALIGN is the alignment that
12644 the object would ordinarily have. The value of this macro is used
12645 instead of that alignment to align the object. */
12648 ix86_local_alignment (type, align)
12652 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12653 to 16byte boundary. */
12656 if (AGGREGATE_TYPE_P (type)
12657 && TYPE_SIZE (type)
12658 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12659 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12660 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12663 if (TREE_CODE (type) == ARRAY_TYPE)
12665 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12667 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12670 else if (TREE_CODE (type) == COMPLEX_TYPE)
12672 if (TYPE_MODE (type) == DCmode && align < 64)
12674 if (TYPE_MODE (type) == XCmode && align < 128)
12677 else if ((TREE_CODE (type) == RECORD_TYPE
12678 || TREE_CODE (type) == UNION_TYPE
12679 || TREE_CODE (type) == QUAL_UNION_TYPE)
12680 && TYPE_FIELDS (type))
12682 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12684 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12687 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12688 || TREE_CODE (type) == INTEGER_TYPE)
12691 if (TYPE_MODE (type) == DFmode && align < 64)
12693 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12699 /* Emit RTL insns to initialize the variable parts of a trampoline.
12700 FNADDR is an RTX for the address of the function's pure code.
12701 CXT is an RTX for the static chain value for the function. */
12703 x86_initialize_trampoline (tramp, fnaddr, cxt)
12704 rtx tramp, fnaddr, cxt;
12708 /* Compute offset from the end of the jmp to the target function. */
12709 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12710 plus_constant (tramp, 10),
12711 NULL_RTX, 1, OPTAB_DIRECT);
12712 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12713 gen_int_mode (0xb9, QImode));
12714 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12715 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12716 gen_int_mode (0xe9, QImode));
12717 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12722 /* Try to load address using shorter movl instead of movabs.
12723 We may want to support movq for kernel mode, but kernel does not use
12724 trampolines at the moment. */
12725 if (x86_64_zero_extended_value (fnaddr))
12727 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12728 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12729 gen_int_mode (0xbb41, HImode));
12730 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12731 gen_lowpart (SImode, fnaddr));
12736 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12737 gen_int_mode (0xbb49, HImode));
12738 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12742 /* Load static chain using movabs to r10. */
12743 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12744 gen_int_mode (0xba49, HImode));
12745 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12748 /* Jump to the r11 */
12749 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12750 gen_int_mode (0xff49, HImode));
12751 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12752 gen_int_mode (0xe3, QImode));
12754 if (offset > TRAMPOLINE_SIZE)
12758 #ifdef TRANSFER_FROM_TRAMPOLINE
12759 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12760 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12764 #define def_builtin(MASK, NAME, TYPE, CODE) \
12766 if ((MASK) & target_flags \
12767 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12768 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12769 NULL, NULL_TREE); \
12772 struct builtin_description
12774 const unsigned int mask;
12775 const enum insn_code icode;
12776 const char *const name;
12777 const enum ix86_builtins code;
12778 const enum rtx_code comparison;
12779 const unsigned int flag;
12782 static const struct builtin_description bdesc_comi[] =
12784 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12785 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12786 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12787 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12788 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12789 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12790 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12791 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12792 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12793 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12794 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12795 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12796 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12797 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12810 static const struct builtin_description bdesc_2arg[] =
12813 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12814 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12815 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12816 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12817 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12820 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12822 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12823 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12824 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12825 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12826 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12827 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12828 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12829 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12830 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12831 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12832 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12833 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12834 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12835 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12836 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12837 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12838 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12839 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12840 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12841 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12843 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12844 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12845 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12846 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12849 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12850 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12851 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12854 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12856 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12857 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12860 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12861 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12862 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12863 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12864 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12865 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12866 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12867 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12869 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12870 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12871 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12872 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12873 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12874 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12876 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12878 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12879 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12880 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12882 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12883 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12884 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12887 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12888 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12890 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12891 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12893 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12894 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12895 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12897 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12898 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12899 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12900 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12902 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12903 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12904 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12905 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12910 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12911 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12914 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12915 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12916 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12918 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12919 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12920 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12921 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12922 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12923 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12925 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12926 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12927 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12928 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12930 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12932 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12933 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12934 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12935 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12937 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12938 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12951 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12952 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12953 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12954 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12955 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12956 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12957 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12958 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12959 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12960 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12961 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12962 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12963 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12964 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12965 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12967 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12968 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12969 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12971 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12995 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12996 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12997 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12998 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12999 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13000 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13001 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13002 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13018 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13025 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13030 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13039 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13060 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13067 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13069 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13072 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13073 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13074 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13075 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13076 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13077 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13080 static const struct builtin_description bdesc_1arg[] =
13082 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13083 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13085 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13086 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13087 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13089 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13090 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13091 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13092 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13093 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13094 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13096 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13097 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13099 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13104 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13107 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13115 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13116 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13117 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13119 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13120 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13121 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13126 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13127 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13128 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13132 ix86_init_builtins ()
13135 ix86_init_mmx_sse_builtins ();
13138 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13139 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13142 ix86_init_mmx_sse_builtins ()
13144 const struct builtin_description * d;
13147 tree pchar_type_node = build_pointer_type (char_type_node);
13148 tree pcchar_type_node = build_pointer_type (
13149 build_type_variant (char_type_node, 1, 0));
13150 tree pfloat_type_node = build_pointer_type (float_type_node);
13151 tree pcfloat_type_node = build_pointer_type (
13152 build_type_variant (float_type_node, 1, 0));
13153 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13154 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13155 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13158 tree int_ftype_v4sf_v4sf
13159 = build_function_type_list (integer_type_node,
13160 V4SF_type_node, V4SF_type_node, NULL_TREE);
13161 tree v4si_ftype_v4sf_v4sf
13162 = build_function_type_list (V4SI_type_node,
13163 V4SF_type_node, V4SF_type_node, NULL_TREE);
13164 /* MMX/SSE/integer conversions. */
13165 tree int_ftype_v4sf
13166 = build_function_type_list (integer_type_node,
13167 V4SF_type_node, NULL_TREE);
13168 tree int64_ftype_v4sf
13169 = build_function_type_list (long_long_integer_type_node,
13170 V4SF_type_node, NULL_TREE);
13171 tree int_ftype_v8qi
13172 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13173 tree v4sf_ftype_v4sf_int
13174 = build_function_type_list (V4SF_type_node,
13175 V4SF_type_node, integer_type_node, NULL_TREE);
13176 tree v4sf_ftype_v4sf_int64
13177 = build_function_type_list (V4SF_type_node,
13178 V4SF_type_node, long_long_integer_type_node,
13180 tree v4sf_ftype_v4sf_v2si
13181 = build_function_type_list (V4SF_type_node,
13182 V4SF_type_node, V2SI_type_node, NULL_TREE);
13183 tree int_ftype_v4hi_int
13184 = build_function_type_list (integer_type_node,
13185 V4HI_type_node, integer_type_node, NULL_TREE);
13186 tree v4hi_ftype_v4hi_int_int
13187 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13188 integer_type_node, integer_type_node,
13190 /* Miscellaneous. */
13191 tree v8qi_ftype_v4hi_v4hi
13192 = build_function_type_list (V8QI_type_node,
13193 V4HI_type_node, V4HI_type_node, NULL_TREE);
13194 tree v4hi_ftype_v2si_v2si
13195 = build_function_type_list (V4HI_type_node,
13196 V2SI_type_node, V2SI_type_node, NULL_TREE);
13197 tree v4sf_ftype_v4sf_v4sf_int
13198 = build_function_type_list (V4SF_type_node,
13199 V4SF_type_node, V4SF_type_node,
13200 integer_type_node, NULL_TREE);
13201 tree v2si_ftype_v4hi_v4hi
13202 = build_function_type_list (V2SI_type_node,
13203 V4HI_type_node, V4HI_type_node, NULL_TREE);
13204 tree v4hi_ftype_v4hi_int
13205 = build_function_type_list (V4HI_type_node,
13206 V4HI_type_node, integer_type_node, NULL_TREE);
13207 tree v4hi_ftype_v4hi_di
13208 = build_function_type_list (V4HI_type_node,
13209 V4HI_type_node, long_long_unsigned_type_node,
13211 tree v2si_ftype_v2si_di
13212 = build_function_type_list (V2SI_type_node,
13213 V2SI_type_node, long_long_unsigned_type_node,
13215 tree void_ftype_void
13216 = build_function_type (void_type_node, void_list_node);
13217 tree void_ftype_unsigned
13218 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13219 tree void_ftype_unsigned_unsigned
13220 = build_function_type_list (void_type_node, unsigned_type_node,
13221 unsigned_type_node, NULL_TREE);
13222 tree void_ftype_pcvoid_unsigned_unsigned
13223 = build_function_type_list (void_type_node, const_ptr_type_node,
13224 unsigned_type_node, unsigned_type_node,
13226 tree unsigned_ftype_void
13227 = build_function_type (unsigned_type_node, void_list_node);
13229 = build_function_type (long_long_unsigned_type_node, void_list_node);
13230 tree v4sf_ftype_void
13231 = build_function_type (V4SF_type_node, void_list_node);
13232 tree v2si_ftype_v4sf
13233 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13234 /* Loads/stores. */
13235 tree void_ftype_v8qi_v8qi_pchar
13236 = build_function_type_list (void_type_node,
13237 V8QI_type_node, V8QI_type_node,
13238 pchar_type_node, NULL_TREE);
13239 tree v4sf_ftype_pcfloat
13240 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13241 /* @@@ the type is bogus */
13242 tree v4sf_ftype_v4sf_pv2si
13243 = build_function_type_list (V4SF_type_node,
13244 V4SF_type_node, pv2si_type_node, NULL_TREE);
13245 tree void_ftype_pv2si_v4sf
13246 = build_function_type_list (void_type_node,
13247 pv2si_type_node, V4SF_type_node, NULL_TREE);
13248 tree void_ftype_pfloat_v4sf
13249 = build_function_type_list (void_type_node,
13250 pfloat_type_node, V4SF_type_node, NULL_TREE);
13251 tree void_ftype_pdi_di
13252 = build_function_type_list (void_type_node,
13253 pdi_type_node, long_long_unsigned_type_node,
13255 tree void_ftype_pv2di_v2di
13256 = build_function_type_list (void_type_node,
13257 pv2di_type_node, V2DI_type_node, NULL_TREE);
13258 /* Normal vector unops. */
13259 tree v4sf_ftype_v4sf
13260 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13262 /* Normal vector binops. */
13263 tree v4sf_ftype_v4sf_v4sf
13264 = build_function_type_list (V4SF_type_node,
13265 V4SF_type_node, V4SF_type_node, NULL_TREE);
13266 tree v8qi_ftype_v8qi_v8qi
13267 = build_function_type_list (V8QI_type_node,
13268 V8QI_type_node, V8QI_type_node, NULL_TREE);
13269 tree v4hi_ftype_v4hi_v4hi
13270 = build_function_type_list (V4HI_type_node,
13271 V4HI_type_node, V4HI_type_node, NULL_TREE);
13272 tree v2si_ftype_v2si_v2si
13273 = build_function_type_list (V2SI_type_node,
13274 V2SI_type_node, V2SI_type_node, NULL_TREE);
13275 tree di_ftype_di_di
13276 = build_function_type_list (long_long_unsigned_type_node,
13277 long_long_unsigned_type_node,
13278 long_long_unsigned_type_node, NULL_TREE);
13280 tree v2si_ftype_v2sf
13281 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13282 tree v2sf_ftype_v2si
13283 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13284 tree v2si_ftype_v2si
13285 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13286 tree v2sf_ftype_v2sf
13287 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13288 tree v2sf_ftype_v2sf_v2sf
13289 = build_function_type_list (V2SF_type_node,
13290 V2SF_type_node, V2SF_type_node, NULL_TREE);
13291 tree v2si_ftype_v2sf_v2sf
13292 = build_function_type_list (V2SI_type_node,
13293 V2SF_type_node, V2SF_type_node, NULL_TREE);
13294 tree pint_type_node = build_pointer_type (integer_type_node);
13295 tree pcint_type_node = build_pointer_type (
13296 build_type_variant (integer_type_node, 1, 0));
13297 tree pdouble_type_node = build_pointer_type (double_type_node);
13298 tree pcdouble_type_node = build_pointer_type (
13299 build_type_variant (double_type_node, 1, 0));
13300 tree int_ftype_v2df_v2df
13301 = build_function_type_list (integer_type_node,
13302 V2DF_type_node, V2DF_type_node, NULL_TREE);
13305 = build_function_type (intTI_type_node, void_list_node);
13306 tree v2di_ftype_void
13307 = build_function_type (V2DI_type_node, void_list_node);
13308 tree ti_ftype_ti_ti
13309 = build_function_type_list (intTI_type_node,
13310 intTI_type_node, intTI_type_node, NULL_TREE);
13311 tree void_ftype_pcvoid
13312 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13314 = build_function_type_list (V2DI_type_node,
13315 long_long_unsigned_type_node, NULL_TREE);
13317 = build_function_type_list (long_long_unsigned_type_node,
13318 V2DI_type_node, NULL_TREE);
13319 tree v4sf_ftype_v4si
13320 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13321 tree v4si_ftype_v4sf
13322 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13323 tree v2df_ftype_v4si
13324 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13325 tree v4si_ftype_v2df
13326 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13327 tree v2si_ftype_v2df
13328 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13329 tree v4sf_ftype_v2df
13330 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13331 tree v2df_ftype_v2si
13332 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13333 tree v2df_ftype_v4sf
13334 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13335 tree int_ftype_v2df
13336 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13337 tree int64_ftype_v2df
13338 = build_function_type_list (long_long_integer_type_node,
13339 V2DF_type_node, NULL_TREE);
13340 tree v2df_ftype_v2df_int
13341 = build_function_type_list (V2DF_type_node,
13342 V2DF_type_node, integer_type_node, NULL_TREE);
13343 tree v2df_ftype_v2df_int64
13344 = build_function_type_list (V2DF_type_node,
13345 V2DF_type_node, long_long_integer_type_node,
13347 tree v4sf_ftype_v4sf_v2df
13348 = build_function_type_list (V4SF_type_node,
13349 V4SF_type_node, V2DF_type_node, NULL_TREE);
13350 tree v2df_ftype_v2df_v4sf
13351 = build_function_type_list (V2DF_type_node,
13352 V2DF_type_node, V4SF_type_node, NULL_TREE);
13353 tree v2df_ftype_v2df_v2df_int
13354 = build_function_type_list (V2DF_type_node,
13355 V2DF_type_node, V2DF_type_node,
13358 tree v2df_ftype_v2df_pv2si
13359 = build_function_type_list (V2DF_type_node,
13360 V2DF_type_node, pv2si_type_node, NULL_TREE);
13361 tree void_ftype_pv2si_v2df
13362 = build_function_type_list (void_type_node,
13363 pv2si_type_node, V2DF_type_node, NULL_TREE);
13364 tree void_ftype_pdouble_v2df
13365 = build_function_type_list (void_type_node,
13366 pdouble_type_node, V2DF_type_node, NULL_TREE);
13367 tree void_ftype_pint_int
13368 = build_function_type_list (void_type_node,
13369 pint_type_node, integer_type_node, NULL_TREE);
13370 tree void_ftype_v16qi_v16qi_pchar
13371 = build_function_type_list (void_type_node,
13372 V16QI_type_node, V16QI_type_node,
13373 pchar_type_node, NULL_TREE);
13374 tree v2df_ftype_pcdouble
13375 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13376 tree v2df_ftype_v2df_v2df
13377 = build_function_type_list (V2DF_type_node,
13378 V2DF_type_node, V2DF_type_node, NULL_TREE);
13379 tree v16qi_ftype_v16qi_v16qi
13380 = build_function_type_list (V16QI_type_node,
13381 V16QI_type_node, V16QI_type_node, NULL_TREE);
13382 tree v8hi_ftype_v8hi_v8hi
13383 = build_function_type_list (V8HI_type_node,
13384 V8HI_type_node, V8HI_type_node, NULL_TREE);
13385 tree v4si_ftype_v4si_v4si
13386 = build_function_type_list (V4SI_type_node,
13387 V4SI_type_node, V4SI_type_node, NULL_TREE);
13388 tree v2di_ftype_v2di_v2di
13389 = build_function_type_list (V2DI_type_node,
13390 V2DI_type_node, V2DI_type_node, NULL_TREE);
13391 tree v2di_ftype_v2df_v2df
13392 = build_function_type_list (V2DI_type_node,
13393 V2DF_type_node, V2DF_type_node, NULL_TREE);
13394 tree v2df_ftype_v2df
13395 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13396 tree v2df_ftype_double
13397 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13398 tree v2df_ftype_double_double
13399 = build_function_type_list (V2DF_type_node,
13400 double_type_node, double_type_node, NULL_TREE);
13401 tree int_ftype_v8hi_int
13402 = build_function_type_list (integer_type_node,
13403 V8HI_type_node, integer_type_node, NULL_TREE);
13404 tree v8hi_ftype_v8hi_int_int
13405 = build_function_type_list (V8HI_type_node,
13406 V8HI_type_node, integer_type_node,
13407 integer_type_node, NULL_TREE);
13408 tree v2di_ftype_v2di_int
13409 = build_function_type_list (V2DI_type_node,
13410 V2DI_type_node, integer_type_node, NULL_TREE);
13411 tree v4si_ftype_v4si_int
13412 = build_function_type_list (V4SI_type_node,
13413 V4SI_type_node, integer_type_node, NULL_TREE);
13414 tree v8hi_ftype_v8hi_int
13415 = build_function_type_list (V8HI_type_node,
13416 V8HI_type_node, integer_type_node, NULL_TREE);
13417 tree v8hi_ftype_v8hi_v2di
13418 = build_function_type_list (V8HI_type_node,
13419 V8HI_type_node, V2DI_type_node, NULL_TREE);
13420 tree v4si_ftype_v4si_v2di
13421 = build_function_type_list (V4SI_type_node,
13422 V4SI_type_node, V2DI_type_node, NULL_TREE);
13423 tree v4si_ftype_v8hi_v8hi
13424 = build_function_type_list (V4SI_type_node,
13425 V8HI_type_node, V8HI_type_node, NULL_TREE);
13426 tree di_ftype_v8qi_v8qi
13427 = build_function_type_list (long_long_unsigned_type_node,
13428 V8QI_type_node, V8QI_type_node, NULL_TREE);
13429 tree v2di_ftype_v16qi_v16qi
13430 = build_function_type_list (V2DI_type_node,
13431 V16QI_type_node, V16QI_type_node, NULL_TREE);
13432 tree int_ftype_v16qi
13433 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13434 tree v16qi_ftype_pcchar
13435 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13436 tree void_ftype_pchar_v16qi
13437 = build_function_type_list (void_type_node,
13438 pchar_type_node, V16QI_type_node, NULL_TREE);
13439 tree v4si_ftype_pcint
13440 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13441 tree void_ftype_pcint_v4si
13442 = build_function_type_list (void_type_node,
13443 pcint_type_node, V4SI_type_node, NULL_TREE);
13444 tree v2di_ftype_v2di
13445 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13447 /* Add all builtins that are more or less simple operations on two
13449 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13451 /* Use one of the operands; the target can have a different mode for
13452 mask-generating compares. */
13453 enum machine_mode mode;
13458 mode = insn_data[d->icode].operand[1].mode;
13463 type = v16qi_ftype_v16qi_v16qi;
13466 type = v8hi_ftype_v8hi_v8hi;
13469 type = v4si_ftype_v4si_v4si;
13472 type = v2di_ftype_v2di_v2di;
13475 type = v2df_ftype_v2df_v2df;
13478 type = ti_ftype_ti_ti;
13481 type = v4sf_ftype_v4sf_v4sf;
13484 type = v8qi_ftype_v8qi_v8qi;
13487 type = v4hi_ftype_v4hi_v4hi;
13490 type = v2si_ftype_v2si_v2si;
13493 type = di_ftype_di_di;
13500 /* Override for comparisons. */
13501 if (d->icode == CODE_FOR_maskcmpv4sf3
13502 || d->icode == CODE_FOR_maskncmpv4sf3
13503 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13504 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13505 type = v4si_ftype_v4sf_v4sf;
13507 if (d->icode == CODE_FOR_maskcmpv2df3
13508 || d->icode == CODE_FOR_maskncmpv2df3
13509 || d->icode == CODE_FOR_vmmaskcmpv2df3
13510 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13511 type = v2di_ftype_v2df_v2df;
13513 def_builtin (d->mask, d->name, type, d->code);
13516 /* Add the remaining MMX insns with somewhat more complicated types. */
13517 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13518 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13519 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13520 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13521 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13523 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13524 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13525 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13527 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13528 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13530 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13531 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13533 /* comi/ucomi insns. */
13534 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13535 if (d->mask == MASK_SSE2)
13536 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13538 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13540 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13541 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13542 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13544 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13545 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13546 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13547 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13548 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13549 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13550 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13551 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13552 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13553 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13554 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13556 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13557 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13559 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13561 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13562 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13563 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13564 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13565 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13566 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13568 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13569 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13570 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13571 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13573 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13574 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13575 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13576 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13578 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13580 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13582 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13583 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13584 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13585 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13586 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13587 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13589 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13591 /* Original 3DNow! */
13592 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13593 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13594 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13595 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13596 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13597 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13598 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13599 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13600 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13601 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13602 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13603 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13604 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13605 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13606 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13607 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13608 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13609 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13610 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13611 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13613 /* 3DNow! extension as used in the Athlon CPU. */
13614 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13615 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13616 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13617 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13618 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13619 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13621 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13639 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13644 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13671 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13672 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13673 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13675 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13676 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13680 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13682 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13685 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13693 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13697 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13704 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13706 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13707 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13711 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13715 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13718 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13723 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13728 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13732 /* Prescott New Instructions. */
13733 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13734 void_ftype_pcvoid_unsigned_unsigned,
13735 IX86_BUILTIN_MONITOR);
13736 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13737 void_ftype_unsigned_unsigned,
13738 IX86_BUILTIN_MWAIT);
13739 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13741 IX86_BUILTIN_MOVSHDUP);
13742 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13744 IX86_BUILTIN_MOVSLDUP);
13745 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13746 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13747 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13748 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13749 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13750 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13753 /* Errors in the source file can cause expand_expr to return const0_rtx
13754 where we expect a vector. To avoid crashing, use one of the vector
13755 clear instructions. */
13757 safe_vector_operand (x, mode)
13759 enum machine_mode mode;
13761 if (x != const0_rtx)
13763 x = gen_reg_rtx (mode);
13765 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13766 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13767 : gen_rtx_SUBREG (DImode, x, 0)));
13769 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13770 : gen_rtx_SUBREG (V4SFmode, x, 0),
13771 CONST0_RTX (V4SFmode)));
13775 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13778 ix86_expand_binop_builtin (icode, arglist, target)
13779 enum insn_code icode;
13784 tree arg0 = TREE_VALUE (arglist);
13785 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13786 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13787 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13788 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13789 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13790 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13792 if (VECTOR_MODE_P (mode0))
13793 op0 = safe_vector_operand (op0, mode0);
13794 if (VECTOR_MODE_P (mode1))
13795 op1 = safe_vector_operand (op1, mode1);
13798 || GET_MODE (target) != tmode
13799 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13800 target = gen_reg_rtx (tmode);
13802 if (GET_MODE (op1) == SImode && mode1 == TImode)
13804 rtx x = gen_reg_rtx (V4SImode);
13805 emit_insn (gen_sse2_loadd (x, op1));
13806 op1 = gen_lowpart (TImode, x);
13809 /* In case the insn wants input operands in modes different from
13810 the result, abort. */
13811 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13814 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13815 op0 = copy_to_mode_reg (mode0, op0);
13816 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13817 op1 = copy_to_mode_reg (mode1, op1);
13819 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13820 yet one of the two must not be a memory. This is normally enforced
13821 by expanders, but we didn't bother to create one here. */
13822 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13823 op0 = copy_to_mode_reg (mode0, op0);
13825 pat = GEN_FCN (icode) (target, op0, op1);
13832 /* Subroutine of ix86_expand_builtin to take care of stores. */
13835 ix86_expand_store_builtin (icode, arglist)
13836 enum insn_code icode;
13840 tree arg0 = TREE_VALUE (arglist);
13841 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13842 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13843 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13844 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13845 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13847 if (VECTOR_MODE_P (mode1))
13848 op1 = safe_vector_operand (op1, mode1);
13850 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13851 op1 = copy_to_mode_reg (mode1, op1);
13853 pat = GEN_FCN (icode) (op0, op1);
13859 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13862 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13863 enum insn_code icode;
13869 tree arg0 = TREE_VALUE (arglist);
13870 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13871 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13872 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13875 || GET_MODE (target) != tmode
13876 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13877 target = gen_reg_rtx (tmode);
13879 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13882 if (VECTOR_MODE_P (mode0))
13883 op0 = safe_vector_operand (op0, mode0);
13885 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13886 op0 = copy_to_mode_reg (mode0, op0);
13889 pat = GEN_FCN (icode) (target, op0);
13896 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13897 sqrtss, rsqrtss, rcpss. */
13900 ix86_expand_unop1_builtin (icode, arglist, target)
13901 enum insn_code icode;
13906 tree arg0 = TREE_VALUE (arglist);
13907 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13908 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13909 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13912 || GET_MODE (target) != tmode
13913 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13914 target = gen_reg_rtx (tmode);
13916 if (VECTOR_MODE_P (mode0))
13917 op0 = safe_vector_operand (op0, mode0);
13919 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13920 op0 = copy_to_mode_reg (mode0, op0);
13923 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13924 op1 = copy_to_mode_reg (mode0, op1);
13926 pat = GEN_FCN (icode) (target, op0, op1);
13933 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13936 ix86_expand_sse_compare (d, arglist, target)
13937 const struct builtin_description *d;
13942 tree arg0 = TREE_VALUE (arglist);
13943 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13944 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13945 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13947 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13948 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13949 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13950 enum rtx_code comparison = d->comparison;
13952 if (VECTOR_MODE_P (mode0))
13953 op0 = safe_vector_operand (op0, mode0);
13954 if (VECTOR_MODE_P (mode1))
13955 op1 = safe_vector_operand (op1, mode1);
13957 /* Swap operands if we have a comparison that isn't available in
13961 rtx tmp = gen_reg_rtx (mode1);
13962 emit_move_insn (tmp, op1);
13968 || GET_MODE (target) != tmode
13969 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13970 target = gen_reg_rtx (tmode);
13972 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13973 op0 = copy_to_mode_reg (mode0, op0);
13974 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13975 op1 = copy_to_mode_reg (mode1, op1);
13977 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13978 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13985 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13988 ix86_expand_sse_comi (d, arglist, target)
13989 const struct builtin_description *d;
13994 tree arg0 = TREE_VALUE (arglist);
13995 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13996 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13997 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13999 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14000 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14001 enum rtx_code comparison = d->comparison;
14003 if (VECTOR_MODE_P (mode0))
14004 op0 = safe_vector_operand (op0, mode0);
14005 if (VECTOR_MODE_P (mode1))
14006 op1 = safe_vector_operand (op1, mode1);
14008 /* Swap operands if we have a comparison that isn't available in
14017 target = gen_reg_rtx (SImode);
14018 emit_move_insn (target, const0_rtx);
14019 target = gen_rtx_SUBREG (QImode, target, 0);
14021 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14022 op0 = copy_to_mode_reg (mode0, op0);
14023 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14024 op1 = copy_to_mode_reg (mode1, op1);
14026 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14027 pat = GEN_FCN (d->icode) (op0, op1);
14031 emit_insn (gen_rtx_SET (VOIDmode,
14032 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14033 gen_rtx_fmt_ee (comparison, QImode,
14037 return SUBREG_REG (target);
14040 /* Expand an expression EXP that calls a built-in function,
14041 with result going to TARGET if that's convenient
14042 (and in mode MODE if that's convenient).
14043 SUBTARGET may be used as the target for computing one of EXP's operands.
14044 IGNORE is nonzero if the value is to be ignored. */
14047 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
14050 rtx subtarget ATTRIBUTE_UNUSED;
14051 enum machine_mode mode ATTRIBUTE_UNUSED;
14052 int ignore ATTRIBUTE_UNUSED;
14054 const struct builtin_description *d;
14056 enum insn_code icode;
14057 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14058 tree arglist = TREE_OPERAND (exp, 1);
14059 tree arg0, arg1, arg2;
14060 rtx op0, op1, op2, pat;
14061 enum machine_mode tmode, mode0, mode1, mode2;
14062 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14066 case IX86_BUILTIN_EMMS:
14067 emit_insn (gen_emms ());
14070 case IX86_BUILTIN_SFENCE:
14071 emit_insn (gen_sfence ());
14074 case IX86_BUILTIN_PEXTRW:
14075 case IX86_BUILTIN_PEXTRW128:
14076 icode = (fcode == IX86_BUILTIN_PEXTRW
14077 ? CODE_FOR_mmx_pextrw
14078 : CODE_FOR_sse2_pextrw);
14079 arg0 = TREE_VALUE (arglist);
14080 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14081 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14082 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14083 tmode = insn_data[icode].operand[0].mode;
14084 mode0 = insn_data[icode].operand[1].mode;
14085 mode1 = insn_data[icode].operand[2].mode;
14087 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14088 op0 = copy_to_mode_reg (mode0, op0);
14089 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14091 /* @@@ better error message */
14092 error ("selector must be an immediate");
14093 return gen_reg_rtx (tmode);
14096 || GET_MODE (target) != tmode
14097 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14098 target = gen_reg_rtx (tmode);
14099 pat = GEN_FCN (icode) (target, op0, op1);
14105 case IX86_BUILTIN_PINSRW:
14106 case IX86_BUILTIN_PINSRW128:
14107 icode = (fcode == IX86_BUILTIN_PINSRW
14108 ? CODE_FOR_mmx_pinsrw
14109 : CODE_FOR_sse2_pinsrw);
14110 arg0 = TREE_VALUE (arglist);
14111 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14112 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14113 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14114 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14115 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14116 tmode = insn_data[icode].operand[0].mode;
14117 mode0 = insn_data[icode].operand[1].mode;
14118 mode1 = insn_data[icode].operand[2].mode;
14119 mode2 = insn_data[icode].operand[3].mode;
14121 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14122 op0 = copy_to_mode_reg (mode0, op0);
14123 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14124 op1 = copy_to_mode_reg (mode1, op1);
14125 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14127 /* @@@ better error message */
14128 error ("selector must be an immediate");
14132 || GET_MODE (target) != tmode
14133 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14134 target = gen_reg_rtx (tmode);
14135 pat = GEN_FCN (icode) (target, op0, op1, op2);
14141 case IX86_BUILTIN_MASKMOVQ:
14142 case IX86_BUILTIN_MASKMOVDQU:
14143 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14144 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14145 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14146 : CODE_FOR_sse2_maskmovdqu));
14147 /* Note the arg order is different from the operand order. */
14148 arg1 = TREE_VALUE (arglist);
14149 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14150 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14151 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14152 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14153 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14154 mode0 = insn_data[icode].operand[0].mode;
14155 mode1 = insn_data[icode].operand[1].mode;
14156 mode2 = insn_data[icode].operand[2].mode;
14158 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14159 op0 = copy_to_mode_reg (mode0, op0);
14160 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14161 op1 = copy_to_mode_reg (mode1, op1);
14162 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14163 op2 = copy_to_mode_reg (mode2, op2);
14164 pat = GEN_FCN (icode) (op0, op1, op2);
14170 case IX86_BUILTIN_SQRTSS:
14171 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14172 case IX86_BUILTIN_RSQRTSS:
14173 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14174 case IX86_BUILTIN_RCPSS:
14175 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14177 case IX86_BUILTIN_LOADAPS:
14178 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14180 case IX86_BUILTIN_LOADUPS:
14181 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14183 case IX86_BUILTIN_STOREAPS:
14184 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14186 case IX86_BUILTIN_STOREUPS:
14187 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14189 case IX86_BUILTIN_LOADSS:
14190 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14192 case IX86_BUILTIN_STORESS:
14193 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14195 case IX86_BUILTIN_LOADHPS:
14196 case IX86_BUILTIN_LOADLPS:
14197 case IX86_BUILTIN_LOADHPD:
14198 case IX86_BUILTIN_LOADLPD:
14199 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14200 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14201 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14202 : CODE_FOR_sse2_movlpd);
14203 arg0 = TREE_VALUE (arglist);
14204 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14205 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14206 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14207 tmode = insn_data[icode].operand[0].mode;
14208 mode0 = insn_data[icode].operand[1].mode;
14209 mode1 = insn_data[icode].operand[2].mode;
14211 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14212 op0 = copy_to_mode_reg (mode0, op0);
14213 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14215 || GET_MODE (target) != tmode
14216 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14217 target = gen_reg_rtx (tmode);
14218 pat = GEN_FCN (icode) (target, op0, op1);
14224 case IX86_BUILTIN_STOREHPS:
14225 case IX86_BUILTIN_STORELPS:
14226 case IX86_BUILTIN_STOREHPD:
14227 case IX86_BUILTIN_STORELPD:
14228 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14229 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14230 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14231 : CODE_FOR_sse2_movlpd);
14232 arg0 = TREE_VALUE (arglist);
14233 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14234 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14235 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14236 mode0 = insn_data[icode].operand[1].mode;
14237 mode1 = insn_data[icode].operand[2].mode;
14239 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14240 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14241 op1 = copy_to_mode_reg (mode1, op1);
14243 pat = GEN_FCN (icode) (op0, op0, op1);
14249 case IX86_BUILTIN_MOVNTPS:
14250 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14251 case IX86_BUILTIN_MOVNTQ:
14252 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14254 case IX86_BUILTIN_LDMXCSR:
14255 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14256 target = assign_386_stack_local (SImode, 0);
14257 emit_move_insn (target, op0);
14258 emit_insn (gen_ldmxcsr (target));
14261 case IX86_BUILTIN_STMXCSR:
14262 target = assign_386_stack_local (SImode, 0);
14263 emit_insn (gen_stmxcsr (target));
14264 return copy_to_mode_reg (SImode, target);
14266 case IX86_BUILTIN_SHUFPS:
14267 case IX86_BUILTIN_SHUFPD:
14268 icode = (fcode == IX86_BUILTIN_SHUFPS
14269 ? CODE_FOR_sse_shufps
14270 : CODE_FOR_sse2_shufpd);
14271 arg0 = TREE_VALUE (arglist);
14272 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14273 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14274 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14275 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14276 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14277 tmode = insn_data[icode].operand[0].mode;
14278 mode0 = insn_data[icode].operand[1].mode;
14279 mode1 = insn_data[icode].operand[2].mode;
14280 mode2 = insn_data[icode].operand[3].mode;
14282 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14283 op0 = copy_to_mode_reg (mode0, op0);
14284 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14285 op1 = copy_to_mode_reg (mode1, op1);
14286 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14288 /* @@@ better error message */
14289 error ("mask must be an immediate");
14290 return gen_reg_rtx (tmode);
14293 || GET_MODE (target) != tmode
14294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14295 target = gen_reg_rtx (tmode);
14296 pat = GEN_FCN (icode) (target, op0, op1, op2);
14302 case IX86_BUILTIN_PSHUFW:
14303 case IX86_BUILTIN_PSHUFD:
14304 case IX86_BUILTIN_PSHUFHW:
14305 case IX86_BUILTIN_PSHUFLW:
14306 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14307 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14308 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14309 : CODE_FOR_mmx_pshufw);
14310 arg0 = TREE_VALUE (arglist);
14311 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14312 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14313 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14314 tmode = insn_data[icode].operand[0].mode;
14315 mode1 = insn_data[icode].operand[1].mode;
14316 mode2 = insn_data[icode].operand[2].mode;
14318 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14319 op0 = copy_to_mode_reg (mode1, op0);
14320 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14322 /* @@@ better error message */
14323 error ("mask must be an immediate");
14327 || GET_MODE (target) != tmode
14328 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14329 target = gen_reg_rtx (tmode);
14330 pat = GEN_FCN (icode) (target, op0, op1);
14336 case IX86_BUILTIN_PSLLDQI128:
14337 case IX86_BUILTIN_PSRLDQI128:
14338 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14339 : CODE_FOR_sse2_lshrti3);
14340 arg0 = TREE_VALUE (arglist);
14341 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14342 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14343 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14344 tmode = insn_data[icode].operand[0].mode;
14345 mode1 = insn_data[icode].operand[1].mode;
14346 mode2 = insn_data[icode].operand[2].mode;
14348 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14350 op0 = copy_to_reg (op0);
14351 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14353 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14355 error ("shift must be an immediate");
14358 target = gen_reg_rtx (V2DImode);
14359 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14365 case IX86_BUILTIN_FEMMS:
14366 emit_insn (gen_femms ());
14369 case IX86_BUILTIN_PAVGUSB:
14370 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14372 case IX86_BUILTIN_PF2ID:
14373 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14375 case IX86_BUILTIN_PFACC:
14376 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14378 case IX86_BUILTIN_PFADD:
14379 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14381 case IX86_BUILTIN_PFCMPEQ:
14382 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14384 case IX86_BUILTIN_PFCMPGE:
14385 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14387 case IX86_BUILTIN_PFCMPGT:
14388 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14390 case IX86_BUILTIN_PFMAX:
14391 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14393 case IX86_BUILTIN_PFMIN:
14394 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14396 case IX86_BUILTIN_PFMUL:
14397 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14399 case IX86_BUILTIN_PFRCP:
14400 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14402 case IX86_BUILTIN_PFRCPIT1:
14403 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14405 case IX86_BUILTIN_PFRCPIT2:
14406 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14408 case IX86_BUILTIN_PFRSQIT1:
14409 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14411 case IX86_BUILTIN_PFRSQRT:
14412 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14414 case IX86_BUILTIN_PFSUB:
14415 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14417 case IX86_BUILTIN_PFSUBR:
14418 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14420 case IX86_BUILTIN_PI2FD:
14421 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14423 case IX86_BUILTIN_PMULHRW:
14424 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14426 case IX86_BUILTIN_PF2IW:
14427 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14429 case IX86_BUILTIN_PFNACC:
14430 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14432 case IX86_BUILTIN_PFPNACC:
14433 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14435 case IX86_BUILTIN_PI2FW:
14436 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14438 case IX86_BUILTIN_PSWAPDSI:
14439 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14441 case IX86_BUILTIN_PSWAPDSF:
14442 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14444 case IX86_BUILTIN_SSE_ZERO:
14445 target = gen_reg_rtx (V4SFmode);
14446 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14449 case IX86_BUILTIN_MMX_ZERO:
14450 target = gen_reg_rtx (DImode);
14451 emit_insn (gen_mmx_clrdi (target));
14454 case IX86_BUILTIN_CLRTI:
14455 target = gen_reg_rtx (V2DImode);
14456 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14460 case IX86_BUILTIN_SQRTSD:
14461 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14462 case IX86_BUILTIN_LOADAPD:
14463 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14464 case IX86_BUILTIN_LOADUPD:
14465 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14467 case IX86_BUILTIN_STOREAPD:
14468 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14469 case IX86_BUILTIN_STOREUPD:
14470 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14472 case IX86_BUILTIN_LOADSD:
14473 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14475 case IX86_BUILTIN_STORESD:
14476 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14478 case IX86_BUILTIN_SETPD1:
14479 target = assign_386_stack_local (DFmode, 0);
14480 arg0 = TREE_VALUE (arglist);
14481 emit_move_insn (adjust_address (target, DFmode, 0),
14482 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14483 op0 = gen_reg_rtx (V2DFmode);
14484 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14485 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14488 case IX86_BUILTIN_SETPD:
14489 target = assign_386_stack_local (V2DFmode, 0);
14490 arg0 = TREE_VALUE (arglist);
14491 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14492 emit_move_insn (adjust_address (target, DFmode, 0),
14493 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14494 emit_move_insn (adjust_address (target, DFmode, 8),
14495 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14496 op0 = gen_reg_rtx (V2DFmode);
14497 emit_insn (gen_sse2_movapd (op0, target));
14500 case IX86_BUILTIN_LOADRPD:
14501 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14502 gen_reg_rtx (V2DFmode), 1);
14503 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14506 case IX86_BUILTIN_LOADPD1:
14507 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14508 gen_reg_rtx (V2DFmode), 1);
14509 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14512 case IX86_BUILTIN_STOREPD1:
14513 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14514 case IX86_BUILTIN_STORERPD:
14515 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14517 case IX86_BUILTIN_CLRPD:
14518 target = gen_reg_rtx (V2DFmode);
14519 emit_insn (gen_sse_clrv2df (target));
14522 case IX86_BUILTIN_MFENCE:
14523 emit_insn (gen_sse2_mfence ());
14525 case IX86_BUILTIN_LFENCE:
14526 emit_insn (gen_sse2_lfence ());
14529 case IX86_BUILTIN_CLFLUSH:
14530 arg0 = TREE_VALUE (arglist);
14531 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14532 icode = CODE_FOR_sse2_clflush;
14533 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14534 op0 = copy_to_mode_reg (Pmode, op0);
14536 emit_insn (gen_sse2_clflush (op0));
14539 case IX86_BUILTIN_MOVNTPD:
14540 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14541 case IX86_BUILTIN_MOVNTDQ:
14542 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14543 case IX86_BUILTIN_MOVNTI:
14544 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14546 case IX86_BUILTIN_LOADDQA:
14547 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14548 case IX86_BUILTIN_LOADDQU:
14549 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14550 case IX86_BUILTIN_LOADD:
14551 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14553 case IX86_BUILTIN_STOREDQA:
14554 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14555 case IX86_BUILTIN_STOREDQU:
14556 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14557 case IX86_BUILTIN_STORED:
14558 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14560 case IX86_BUILTIN_MONITOR:
14561 arg0 = TREE_VALUE (arglist);
14562 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14563 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14564 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14565 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14566 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14568 op0 = copy_to_mode_reg (SImode, op0);
14570 op1 = copy_to_mode_reg (SImode, op1);
14572 op2 = copy_to_mode_reg (SImode, op2);
14573 emit_insn (gen_monitor (op0, op1, op2));
14576 case IX86_BUILTIN_MWAIT:
14577 arg0 = TREE_VALUE (arglist);
14578 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14579 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14580 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14582 op0 = copy_to_mode_reg (SImode, op0);
14584 op1 = copy_to_mode_reg (SImode, op1);
14585 emit_insn (gen_mwait (op0, op1));
14588 case IX86_BUILTIN_LOADDDUP:
14589 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14591 case IX86_BUILTIN_LDDQU:
14592 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14599 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14600 if (d->code == fcode)
14602 /* Compares are treated specially. */
14603 if (d->icode == CODE_FOR_maskcmpv4sf3
14604 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14605 || d->icode == CODE_FOR_maskncmpv4sf3
14606 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14607 || d->icode == CODE_FOR_maskcmpv2df3
14608 || d->icode == CODE_FOR_vmmaskcmpv2df3
14609 || d->icode == CODE_FOR_maskncmpv2df3
14610 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14611 return ix86_expand_sse_compare (d, arglist, target);
14613 return ix86_expand_binop_builtin (d->icode, arglist, target);
14616 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14617 if (d->code == fcode)
14618 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14620 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14621 if (d->code == fcode)
14622 return ix86_expand_sse_comi (d, arglist, target);
14624 /* @@@ Should really do something sensible here. */
14628 /* Store OPERAND to the memory after reload is completed. This means
14629 that we can't easily use assign_stack_local. */
14631 ix86_force_to_memory (mode, operand)
14632 enum machine_mode mode;
14636 if (!reload_completed)
14638 if (TARGET_RED_ZONE)
14640 result = gen_rtx_MEM (mode,
14641 gen_rtx_PLUS (Pmode,
14643 GEN_INT (-RED_ZONE_SIZE)));
14644 emit_move_insn (result, operand);
14646 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14652 operand = gen_lowpart (DImode, operand);
14656 gen_rtx_SET (VOIDmode,
14657 gen_rtx_MEM (DImode,
14658 gen_rtx_PRE_DEC (DImode,
14659 stack_pointer_rtx)),
14665 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14674 split_di (&operand, 1, operands, operands + 1);
14676 gen_rtx_SET (VOIDmode,
14677 gen_rtx_MEM (SImode,
14678 gen_rtx_PRE_DEC (Pmode,
14679 stack_pointer_rtx)),
14682 gen_rtx_SET (VOIDmode,
14683 gen_rtx_MEM (SImode,
14684 gen_rtx_PRE_DEC (Pmode,
14685 stack_pointer_rtx)),
14690 /* It is better to store HImodes as SImodes. */
14691 if (!TARGET_PARTIAL_REG_STALL)
14692 operand = gen_lowpart (SImode, operand);
14696 gen_rtx_SET (VOIDmode,
14697 gen_rtx_MEM (GET_MODE (operand),
14698 gen_rtx_PRE_DEC (SImode,
14699 stack_pointer_rtx)),
14705 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14710 /* Free operand from the memory. */
14712 ix86_free_from_memory (mode)
14713 enum machine_mode mode;
14715 if (!TARGET_RED_ZONE)
14719 if (mode == DImode || TARGET_64BIT)
14721 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14725 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14726 to pop or add instruction if registers are available. */
14727 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14728 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14733 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14734 QImode must go into class Q_REGS.
14735 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14736 movdf to do mem-to-mem moves through integer regs. */
14738 ix86_preferred_reload_class (x, class)
14740 enum reg_class class;
14742 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14744 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14746 /* SSE can't load any constant directly yet. */
14747 if (SSE_CLASS_P (class))
14749 /* Floats can load 0 and 1. */
14750 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14752 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14753 if (MAYBE_SSE_CLASS_P (class))
14754 return (reg_class_subset_p (class, GENERAL_REGS)
14755 ? GENERAL_REGS : FLOAT_REGS);
14759 /* General regs can load everything. */
14760 if (reg_class_subset_p (class, GENERAL_REGS))
14761 return GENERAL_REGS;
14762 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14763 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14766 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14768 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14773 /* If we are copying between general and FP registers, we need a memory
14774 location. The same is true for SSE and MMX registers.
14776 The macro can't work reliably when one of the CLASSES is class containing
14777 registers from multiple units (SSE, MMX, integer). We avoid this by never
14778 combining those units in single alternative in the machine description.
14779 Ensure that this constraint holds to avoid unexpected surprises.
14781 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14782 enforce these sanity checks. */
14784 ix86_secondary_memory_needed (class1, class2, mode, strict)
14785 enum reg_class class1, class2;
14786 enum machine_mode mode;
14789 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14790 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14791 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14792 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14793 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14794 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14801 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14802 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14803 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14804 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14805 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14807 /* Return the cost of moving data from a register in class CLASS1 to
14808 one in class CLASS2.
14810 It is not required that the cost always equal 2 when FROM is the same as TO;
14811 on some machines it is expensive to move between registers if they are not
14812 general registers. */
14814 ix86_register_move_cost (mode, class1, class2)
14815 enum machine_mode mode;
14816 enum reg_class class1, class2;
14818 /* In case we require secondary memory, compute cost of the store followed
14819 by load. In order to avoid bad register allocation choices, we need
14820 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14822 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14826 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14827 MEMORY_MOVE_COST (mode, class1, 1));
14828 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14829 MEMORY_MOVE_COST (mode, class2, 1));
14831 /* In case of copying from general_purpose_register we may emit multiple
14832 stores followed by single load causing memory size mismatch stall.
14833 Count this as arbitrarily high cost of 20. */
14834 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14837 /* In the case of FP/MMX moves, the registers actually overlap, and we
14838 have to switch modes in order to treat them differently. */
14839 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14840 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14846 /* Moves between SSE/MMX and integer unit are expensive. */
14847 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14848 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14849 return ix86_cost->mmxsse_to_integer;
14850 if (MAYBE_FLOAT_CLASS_P (class1))
14851 return ix86_cost->fp_move;
14852 if (MAYBE_SSE_CLASS_P (class1))
14853 return ix86_cost->sse_move;
14854 if (MAYBE_MMX_CLASS_P (class1))
14855 return ix86_cost->mmx_move;
14859 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14861 ix86_hard_regno_mode_ok (regno, mode)
14863 enum machine_mode mode;
14865 /* Flags and only flags can only hold CCmode values. */
14866 if (CC_REGNO_P (regno))
14867 return GET_MODE_CLASS (mode) == MODE_CC;
14868 if (GET_MODE_CLASS (mode) == MODE_CC
14869 || GET_MODE_CLASS (mode) == MODE_RANDOM
14870 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14872 if (FP_REGNO_P (regno))
14873 return VALID_FP_MODE_P (mode);
14874 if (SSE_REGNO_P (regno))
14875 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14876 if (MMX_REGNO_P (regno))
14878 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14879 /* We handle both integer and floats in the general purpose registers.
14880 In future we should be able to handle vector modes as well. */
14881 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14883 /* Take care for QImode values - they can be in non-QI regs, but then
14884 they do cause partial register stalls. */
14885 if (regno < 4 || mode != QImode || TARGET_64BIT)
14887 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14890 /* Return the cost of moving data of mode M between a
14891 register and memory. A value of 2 is the default; this cost is
14892 relative to those in `REGISTER_MOVE_COST'.
14894 If moving between registers and memory is more expensive than
14895 between two registers, you should define this macro to express the
14898 Model also increased moving costs of QImode registers in non
14902 ix86_memory_move_cost (mode, class, in)
14903 enum machine_mode mode;
14904 enum reg_class class;
14907 if (FLOAT_CLASS_P (class))
14925 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14927 if (SSE_CLASS_P (class))
14930 switch (GET_MODE_SIZE (mode))
14944 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14946 if (MMX_CLASS_P (class))
14949 switch (GET_MODE_SIZE (mode))
14960 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14962 switch (GET_MODE_SIZE (mode))
14966 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14967 : ix86_cost->movzbl_load);
14969 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14970 : ix86_cost->int_store[0] + 4);
14973 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14975 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14976 if (mode == TFmode)
14978 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14979 * (((int) GET_MODE_SIZE (mode)
14980 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14984 /* Compute a (partial) cost for rtx X. Return true if the complete
14985 cost has been computed, and false if subexpressions should be
14986 scanned. In either case, *TOTAL contains the cost result. */
14989 ix86_rtx_costs (x, code, outer_code, total)
14991 int code, outer_code;
14994 enum machine_mode mode = GET_MODE (x);
15002 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15004 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15006 else if (flag_pic && SYMBOLIC_CONST (x)
15008 || (!GET_CODE (x) != LABEL_REF
15009 && (GET_CODE (x) != SYMBOL_REF
15010 || !SYMBOL_REF_LOCAL_P (x)))))
15017 if (mode == VOIDmode)
15020 switch (standard_80387_constant_p (x))
15025 default: /* Other constants */
15030 /* Start with (MEM (SYMBOL_REF)), since that's where
15031 it'll probably end up. Add a penalty for size. */
15032 *total = (COSTS_N_INSNS (1)
15033 + (flag_pic != 0 && !TARGET_64BIT)
15034 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15040 /* The zero extensions is often completely free on x86_64, so make
15041 it as cheap as possible. */
15042 if (TARGET_64BIT && mode == DImode
15043 && GET_MODE (XEXP (x, 0)) == SImode)
15045 else if (TARGET_ZERO_EXTEND_WITH_AND)
15046 *total = COSTS_N_INSNS (ix86_cost->add);
15048 *total = COSTS_N_INSNS (ix86_cost->movzx);
15052 *total = COSTS_N_INSNS (ix86_cost->movsx);
15056 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15057 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15059 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15062 *total = COSTS_N_INSNS (ix86_cost->add);
15065 if ((value == 2 || value == 3)
15066 && !TARGET_DECOMPOSE_LEA
15067 && ix86_cost->lea <= ix86_cost->shift_const)
15069 *total = COSTS_N_INSNS (ix86_cost->lea);
15079 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15081 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15083 if (INTVAL (XEXP (x, 1)) > 32)
15084 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15086 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15090 if (GET_CODE (XEXP (x, 1)) == AND)
15091 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15093 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15098 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15099 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15101 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15106 if (FLOAT_MODE_P (mode))
15107 *total = COSTS_N_INSNS (ix86_cost->fmul);
15108 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15110 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15113 for (nbits = 0; value != 0; value >>= 1)
15116 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15117 + nbits * ix86_cost->mult_bit);
15121 /* This is arbitrary */
15122 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15123 + 7 * ix86_cost->mult_bit);
15131 if (FLOAT_MODE_P (mode))
15132 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15134 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15138 if (FLOAT_MODE_P (mode))
15139 *total = COSTS_N_INSNS (ix86_cost->fadd);
15140 else if (!TARGET_DECOMPOSE_LEA
15141 && GET_MODE_CLASS (mode) == MODE_INT
15142 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15144 if (GET_CODE (XEXP (x, 0)) == PLUS
15145 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15146 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15147 && CONSTANT_P (XEXP (x, 1)))
15149 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15150 if (val == 2 || val == 4 || val == 8)
15152 *total = COSTS_N_INSNS (ix86_cost->lea);
15153 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15154 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15156 *total += rtx_cost (XEXP (x, 1), outer_code);
15160 else if (GET_CODE (XEXP (x, 0)) == MULT
15161 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15163 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15164 if (val == 2 || val == 4 || val == 8)
15166 *total = COSTS_N_INSNS (ix86_cost->lea);
15167 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15168 *total += rtx_cost (XEXP (x, 1), outer_code);
15172 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15174 *total = COSTS_N_INSNS (ix86_cost->lea);
15175 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15176 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15177 *total += rtx_cost (XEXP (x, 1), outer_code);
15184 if (FLOAT_MODE_P (mode))
15186 *total = COSTS_N_INSNS (ix86_cost->fadd);
15194 if (!TARGET_64BIT && mode == DImode)
15196 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15197 + (rtx_cost (XEXP (x, 0), outer_code)
15198 << (GET_MODE (XEXP (x, 0)) != DImode))
15199 + (rtx_cost (XEXP (x, 1), outer_code)
15200 << (GET_MODE (XEXP (x, 1)) != DImode)));
15206 if (FLOAT_MODE_P (mode))
15208 *total = COSTS_N_INSNS (ix86_cost->fchs);
15214 if (!TARGET_64BIT && mode == DImode)
15215 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15217 *total = COSTS_N_INSNS (ix86_cost->add);
15221 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15226 if (FLOAT_MODE_P (mode))
15227 *total = COSTS_N_INSNS (ix86_cost->fabs);
15231 if (FLOAT_MODE_P (mode))
15232 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15236 if (XINT (x, 1) == UNSPEC_TP)
15245 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15247 ix86_svr3_asm_out_constructor (symbol, priority)
15249 int priority ATTRIBUTE_UNUSED;
15252 fputs ("\tpushl $", asm_out_file);
15253 assemble_name (asm_out_file, XSTR (symbol, 0));
15254 fputc ('\n', asm_out_file);
15260 static int current_machopic_label_num;
15262 /* Given a symbol name and its associated stub, write out the
15263 definition of the stub. */
15266 machopic_output_stub (file, symb, stub)
15268 const char *symb, *stub;
15270 unsigned int length;
15271 char *binder_name, *symbol_name, lazy_ptr_name[32];
15272 int label = ++current_machopic_label_num;
15274 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15275 symb = (*targetm.strip_name_encoding) (symb);
15277 length = strlen (stub);
15278 binder_name = alloca (length + 32);
15279 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15281 length = strlen (symb);
15282 symbol_name = alloca (length + 32);
15283 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15285 sprintf (lazy_ptr_name, "L%d$lz", label);
15288 machopic_picsymbol_stub_section ();
15290 machopic_symbol_stub_section ();
15292 fprintf (file, "%s:\n", stub);
15293 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15297 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15298 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15299 fprintf (file, "\tjmp %%edx\n");
15302 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15304 fprintf (file, "%s:\n", binder_name);
15308 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15309 fprintf (file, "\tpushl %%eax\n");
15312 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15314 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15316 machopic_lazy_symbol_ptr_section ();
15317 fprintf (file, "%s:\n", lazy_ptr_name);
15318 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15319 fprintf (file, "\t.long %s\n", binder_name);
15321 #endif /* TARGET_MACHO */
15323 /* Order the registers for register allocator. */
15326 x86_order_regs_for_local_alloc ()
15331 /* First allocate the local general purpose registers. */
15332 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15333 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15334 reg_alloc_order [pos++] = i;
15336 /* Global general purpose registers. */
15337 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15338 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15339 reg_alloc_order [pos++] = i;
15341 /* x87 registers come first in case we are doing FP math
15343 if (!TARGET_SSE_MATH)
15344 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15345 reg_alloc_order [pos++] = i;
15347 /* SSE registers. */
15348 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15349 reg_alloc_order [pos++] = i;
15350 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15351 reg_alloc_order [pos++] = i;
15353 /* x87 registers. */
15354 if (TARGET_SSE_MATH)
15355 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15356 reg_alloc_order [pos++] = i;
15358 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15359 reg_alloc_order [pos++] = i;
15361 /* Initialize the rest of array as we do not allocate some registers
15363 while (pos < FIRST_PSEUDO_REGISTER)
15364 reg_alloc_order [pos++] = 0;
15367 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15368 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15371 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15372 struct attribute_spec.handler. */
15374 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15377 tree args ATTRIBUTE_UNUSED;
15378 int flags ATTRIBUTE_UNUSED;
15379 bool *no_add_attrs;
15382 if (DECL_P (*node))
15384 if (TREE_CODE (*node) == TYPE_DECL)
15385 type = &TREE_TYPE (*node);
15390 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15391 || TREE_CODE (*type) == UNION_TYPE)))
15393 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15394 *no_add_attrs = true;
15397 else if ((is_attribute_p ("ms_struct", name)
15398 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15399 || ((is_attribute_p ("gcc_struct", name)
15400 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15402 warning ("`%s' incompatible attribute ignored",
15403 IDENTIFIER_POINTER (name));
15404 *no_add_attrs = true;
15411 ix86_ms_bitfield_layout_p (record_type)
15414 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15415 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15416 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15419 /* Returns an expression indicating where the this parameter is
15420 located on entry to the FUNCTION. */
15423 x86_this_parameter (function)
15426 tree type = TREE_TYPE (function);
15430 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15431 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15434 if (ix86_fntype_regparm (type) > 0)
15438 parm = TYPE_ARG_TYPES (type);
15439 /* Figure out whether or not the function has a variable number of
15441 for (; parm; parm = TREE_CHAIN (parm))
15442 if (TREE_VALUE (parm) == void_type_node)
15444 /* If not, the this parameter is in %eax. */
15446 return gen_rtx_REG (SImode, 0);
15449 if (aggregate_value_p (TREE_TYPE (type)))
15450 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15452 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15455 /* Determine whether x86_output_mi_thunk can succeed. */
15458 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15459 tree thunk ATTRIBUTE_UNUSED;
15460 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15461 HOST_WIDE_INT vcall_offset;
15464 /* 64-bit can handle anything. */
15468 /* For 32-bit, everything's fine if we have one free register. */
15469 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15472 /* Need a free register for vcall_offset. */
15476 /* Need a free register for GOT references. */
15477 if (flag_pic && !(*targetm.binds_local_p) (function))
15480 /* Otherwise ok. */
15484 /* Output the assembler code for a thunk function. THUNK_DECL is the
15485 declaration for the thunk function itself, FUNCTION is the decl for
15486 the target function. DELTA is an immediate constant offset to be
15487 added to THIS. If VCALL_OFFSET is nonzero, the word at
15488 *(*this + vcall_offset) should be added to THIS. */
15491 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15492 FILE *file ATTRIBUTE_UNUSED;
15493 tree thunk ATTRIBUTE_UNUSED;
15494 HOST_WIDE_INT delta;
15495 HOST_WIDE_INT vcall_offset;
15499 rtx this = x86_this_parameter (function);
15502 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15503 pull it in now and let DELTA benefit. */
15506 else if (vcall_offset)
15508 /* Put the this parameter into %eax. */
15510 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15511 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15514 this_reg = NULL_RTX;
15516 /* Adjust the this parameter by a fixed constant. */
15519 xops[0] = GEN_INT (delta);
15520 xops[1] = this_reg ? this_reg : this;
15523 if (!x86_64_general_operand (xops[0], DImode))
15525 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15527 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15531 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15534 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15537 /* Adjust the this parameter by a value stored in the vtable. */
15541 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15543 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15545 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15548 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15550 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15552 /* Adjust the this parameter. */
15553 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15554 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15556 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15557 xops[0] = GEN_INT (vcall_offset);
15559 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15560 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15562 xops[1] = this_reg;
15564 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15566 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15569 /* If necessary, drop THIS back to its stack slot. */
15570 if (this_reg && this_reg != this)
15572 xops[0] = this_reg;
15574 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15577 xops[0] = XEXP (DECL_RTL (function), 0);
15580 if (!flag_pic || (*targetm.binds_local_p) (function))
15581 output_asm_insn ("jmp\t%P0", xops);
15584 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15585 tmp = gen_rtx_CONST (Pmode, tmp);
15586 tmp = gen_rtx_MEM (QImode, tmp);
15588 output_asm_insn ("jmp\t%A0", xops);
15593 if (!flag_pic || (*targetm.binds_local_p) (function))
15594 output_asm_insn ("jmp\t%P0", xops);
15599 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15600 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15601 tmp = gen_rtx_MEM (QImode, tmp);
15603 output_asm_insn ("jmp\t%0", xops);
15606 #endif /* TARGET_MACHO */
15608 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15609 output_set_got (tmp);
15612 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15613 output_asm_insn ("jmp\t{*}%1", xops);
15621 default_file_start ();
15622 if (X86_FILE_START_VERSION_DIRECTIVE)
15623 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15624 if (X86_FILE_START_FLTUSED)
15625 fputs ("\t.global\t__fltused\n", asm_out_file);
15626 if (ix86_asm_dialect == ASM_INTEL)
15627 fputs ("\t.intel_syntax\n", asm_out_file);
15631 x86_field_alignment (field, computed)
15635 enum machine_mode mode;
15636 tree type = TREE_TYPE (field);
15638 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15640 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15641 ? get_inner_array_type (type) : type);
15642 if (mode == DFmode || mode == DCmode
15643 || GET_MODE_CLASS (mode) == MODE_INT
15644 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15645 return MIN (32, computed);
15649 /* Output assembler code to FILE to increment profiler label # LABELNO
15650 for profiling a function entry. */
15652 x86_function_profiler (file, labelno)
15654 int labelno ATTRIBUTE_UNUSED;
15659 #ifndef NO_PROFILE_COUNTERS
15660 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15662 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15666 #ifndef NO_PROFILE_COUNTERS
15667 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15669 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15673 #ifndef NO_PROFILE_COUNTERS
15674 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15675 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15677 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15681 #ifndef NO_PROFILE_COUNTERS
15682 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15683 PROFILE_COUNT_REGISTER);
15685 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15689 /* We don't have exact information about the insn sizes, but we may assume
15690 quite safely that we are informed about all 1 byte insns and memory
15691 address sizes. This is enought to elliminate unnecesary padding in
15695 min_insn_size (insn)
15700 if (!INSN_P (insn) || !active_insn_p (insn))
15703 /* Discard alignments we've emit and jump instructions. */
15704 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15705 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15707 if (GET_CODE (insn) == JUMP_INSN
15708 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15709 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15712 /* Important case - calls are always 5 bytes.
15713 It is common to have many calls in the row. */
15714 if (GET_CODE (insn) == CALL_INSN
15715 && symbolic_reference_mentioned_p (PATTERN (insn))
15716 && !SIBLING_CALL_P (insn))
15718 if (get_attr_length (insn) <= 1)
15721 /* For normal instructions we may rely on the sizes of addresses
15722 and the presence of symbol to require 4 bytes of encoding.
15723 This is not the case for jumps where references are PC relative. */
15724 if (GET_CODE (insn) != JUMP_INSN)
15726 l = get_attr_length_address (insn);
15727 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15736 /* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte
15740 k8_avoid_jump_misspredicts ()
15742 rtx insn, start = get_insns ();
15743 int nbytes = 0, njumps = 0;
15746 /* Look for all minimal intervals of instructions containing 4 jumps.
15747 The intervals are bounded by START and INSN. NBYTES is the total
15748 size of instructions in the interval including INSN and not including
15749 START. When the NBYTES is smaller than 16 bytes, it is possible
15750 that the end of START and INSN ends up in the same 16byte page.
15752 The smallest offset in the page INSN can start is the case where START
15753 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15754 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15756 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15759 nbytes += min_insn_size (insn);
15761 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15762 INSN_UID (insn), min_insn_size (insn));
15763 if ((GET_CODE (insn) == JUMP_INSN
15764 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15765 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15766 || GET_CODE (insn) == CALL_INSN)
15773 start = NEXT_INSN (start);
15774 if ((GET_CODE (start) == JUMP_INSN
15775 && GET_CODE (PATTERN (start)) != ADDR_VEC
15776 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15777 || GET_CODE (start) == CALL_INSN)
15778 njumps--, isjump = 1;
15781 nbytes -= min_insn_size (start);
15786 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15787 INSN_UID (start), INSN_UID (insn), nbytes);
15789 if (njumps == 3 && isjump && nbytes < 16)
15791 int padsize = 15 - nbytes + min_insn_size (insn);
15794 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15795 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15800 /* Implement machine specific optimizations.
15801 At the moment we implement single transformation: AMD Athlon works faster
15802 when RET is not destination of conditional jump or directly preceded
15803 by other jump instruction. We avoid the penalty by inserting NOP just
15804 before the RET instructions in such cases. */
15810 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15812 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15814 basic_block bb = e->src;
15817 bool replace = false;
15819 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15820 || !maybe_hot_bb_p (bb))
15822 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15823 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15825 if (prev && GET_CODE (prev) == CODE_LABEL)
15828 for (e = bb->pred; e; e = e->pred_next)
15829 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15830 && !(e->flags & EDGE_FALLTHRU))
15835 prev = prev_active_insn (ret);
15837 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15838 || GET_CODE (prev) == CALL_INSN))
15840 /* Empty functions get branch misspredict even when the jump destination
15841 is not visible to us. */
15842 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15847 emit_insn_before (gen_return_internal_long (), ret);
15851 k8_avoid_jump_misspredicts ();
15854 /* Return nonzero when QImode register that must be represented via REX prefix
15857 x86_extended_QIreg_mentioned_p (insn)
15861 extract_insn_cached (insn);
15862 for (i = 0; i < recog_data.n_operands; i++)
15863 if (REG_P (recog_data.operand[i])
15864 && REGNO (recog_data.operand[i]) >= 4)
15869 /* Return nonzero when P points to register encoded via REX prefix.
15870 Called via for_each_rtx. */
15872 extended_reg_mentioned_1 (p, data)
15874 void *data ATTRIBUTE_UNUSED;
15876 unsigned int regno;
15879 regno = REGNO (*p);
15880 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15883 /* Return true when INSN mentions register that must be encoded using REX
15886 x86_extended_reg_mentioned_p (insn)
15889 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15892 /* Generate an unsigned DImode to FP conversion. This is the same code
15893 optabs would emit if we didn't have TFmode patterns. */
15896 x86_emit_floatuns (operands)
15899 rtx neglab, donelab, i0, i1, f0, in, out;
15900 enum machine_mode mode;
15903 in = force_reg (DImode, operands[1]);
15904 mode = GET_MODE (out);
15905 neglab = gen_label_rtx ();
15906 donelab = gen_label_rtx ();
15907 i1 = gen_reg_rtx (Pmode);
15908 f0 = gen_reg_rtx (mode);
15910 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15912 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15913 emit_jump_insn (gen_jump (donelab));
15916 emit_label (neglab);
15918 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15919 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15920 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15921 expand_float (f0, i0, 0);
15922 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15924 emit_label (donelab);
15927 /* Return if we do not know how to pass TYPE solely in registers. */
15929 ix86_must_pass_in_stack (mode, type)
15930 enum machine_mode mode;
15933 if (default_must_pass_in_stack (mode, type))
15935 return (!TARGET_64BIT && type && mode == TImode);
15938 #include "gt-i386.h"