1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
524 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 /* In case the average insn count for single function invocation is
527 lower than this constant, emit fast (but longer) prologue and
529 #define FAST_PROLOGUE_INSN_COUNT 20
531 /* Set by prologue expander and used by epilogue expander to determine
533 static int use_fast_prologue_epilogue;
535 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
536 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
537 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
538 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
540 /* Array of the smallest class containing reg number REGNO, indexed by
541 REGNO. Used by REGNO_REG_CLASS in i386.h. */
543 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
546 AREG, DREG, CREG, BREG,
548 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
550 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
551 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
554 /* flags, fpsr, dirflag, frame */
555 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
556 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
558 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
566 /* The "default" register map used in 32bit mode. */
568 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
570 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
571 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
572 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
573 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
574 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
579 static int const x86_64_int_parameter_registers[6] =
581 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
582 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
585 static int const x86_64_int_return_registers[4] =
587 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
590 /* The "default" register map used in 64bit mode. */
591 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
593 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
594 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
595 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
596 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
597 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
598 8,9,10,11,12,13,14,15, /* extended integer registers */
599 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
602 /* Define the register numbers to be used in Dwarf debugging information.
603 The SVR4 reference port C compiler uses the following register numbers
604 in its Dwarf output code:
605 0 for %eax (gcc regno = 0)
606 1 for %ecx (gcc regno = 2)
607 2 for %edx (gcc regno = 1)
608 3 for %ebx (gcc regno = 3)
609 4 for %esp (gcc regno = 7)
610 5 for %ebp (gcc regno = 6)
611 6 for %esi (gcc regno = 4)
612 7 for %edi (gcc regno = 5)
613 The following three DWARF register numbers are never generated by
614 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
615 believes these numbers have these meanings.
616 8 for %eip (no gcc equivalent)
617 9 for %eflags (gcc regno = 17)
618 10 for %trapno (no gcc equivalent)
619 It is not at all clear how we should number the FP stack registers
620 for the x86 architecture. If the version of SDB on x86/svr4 were
621 a bit less brain dead with respect to floating-point then we would
622 have a precedent to follow with respect to DWARF register numbers
623 for x86 FP registers, but the SDB on x86/svr4 is so completely
624 broken with respect to FP registers that it is hardly worth thinking
625 of it as something to strive for compatibility with.
626 The version of x86/svr4 SDB I have at the moment does (partially)
627 seem to believe that DWARF register number 11 is associated with
628 the x86 register %st(0), but that's about all. Higher DWARF
629 register numbers don't seem to be associated with anything in
630 particular, and even for DWARF regno 11, SDB only seems to under-
631 stand that it should say that a variable lives in %st(0) (when
632 asked via an `=' command) if we said it was in DWARF regno 11,
633 but SDB still prints garbage when asked for the value of the
634 variable in question (via a `/' command).
635 (Also note that the labels SDB prints for various FP stack regs
636 when doing an `x' command are all wrong.)
637 Note that these problems generally don't affect the native SVR4
638 C compiler because it doesn't allow the use of -O with -g and
639 because when it is *not* optimizing, it allocates a memory
640 location for each floating-point variable, and the memory
641 location is what gets described in the DWARF AT_location
642 attribute for the variable in question.
643 Regardless of the severe mental illness of the x86/svr4 SDB, we
644 do something sensible here and we use the following DWARF
645 register numbers. Note that these are all stack-top-relative
647 11 for %st(0) (gcc regno = 8)
648 12 for %st(1) (gcc regno = 9)
649 13 for %st(2) (gcc regno = 10)
650 14 for %st(3) (gcc regno = 11)
651 15 for %st(4) (gcc regno = 12)
652 16 for %st(5) (gcc regno = 13)
653 17 for %st(6) (gcc regno = 14)
654 18 for %st(7) (gcc regno = 15)
656 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
658 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
659 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
660 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
661 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
662 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
667 /* Test and compare insns in i386.md store the information needed to
668 generate branch and scc insns here. */
670 rtx ix86_compare_op0 = NULL_RTX;
671 rtx ix86_compare_op1 = NULL_RTX;
673 /* The encoding characters for the four TLS models present in ELF. */
675 static char const tls_model_chars[] = " GLil";
677 #define MAX_386_STACK_LOCALS 3
678 /* Size of the register save area. */
679 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
681 /* Define the structure for the machine field in struct function. */
682 struct machine_function GTY(())
684 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
685 const char *some_ld_name;
686 int save_varrargs_registers;
687 int accesses_prev_frame;
690 #define ix86_stack_locals (cfun->machine->stack_locals)
691 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
693 /* Structure describing stack frame layout.
694 Stack grows downward:
700 saved frame pointer if frame_pointer_needed
701 <- HARD_FRAME_POINTER
707 > to_allocate <- FRAME_POINTER
719 int outgoing_arguments_size;
722 HOST_WIDE_INT to_allocate;
723 /* The offsets relative to ARG_POINTER. */
724 HOST_WIDE_INT frame_pointer_offset;
725 HOST_WIDE_INT hard_frame_pointer_offset;
726 HOST_WIDE_INT stack_pointer_offset;
729 /* Used to enable/disable debugging features. */
730 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
731 /* Code model option as passed by user. */
732 const char *ix86_cmodel_string;
734 enum cmodel ix86_cmodel;
736 const char *ix86_asm_string;
737 enum asm_dialect ix86_asm_dialect = ASM_ATT;
739 const char *ix86_tls_dialect_string;
740 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
742 /* Which unit we are generating floating point math for. */
743 enum fpmath_unit ix86_fpmath;
745 /* Which cpu are we scheduling for. */
746 enum processor_type ix86_cpu;
747 /* Which instruction set architecture to use. */
748 enum processor_type ix86_arch;
750 /* Strings to hold which cpu and instruction set architecture to use. */
751 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
752 const char *ix86_arch_string; /* for -march=<xxx> */
753 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
755 /* # of registers to use to pass arguments. */
756 const char *ix86_regparm_string;
758 /* true if sse prefetch instruction is not NOOP. */
759 int x86_prefetch_sse;
761 /* ix86_regparm_string as a number */
764 /* Alignment to use for loops and jumps: */
766 /* Power of two alignment for loops. */
767 const char *ix86_align_loops_string;
769 /* Power of two alignment for non-loop jumps. */
770 const char *ix86_align_jumps_string;
772 /* Power of two alignment for stack boundary in bytes. */
773 const char *ix86_preferred_stack_boundary_string;
775 /* Preferred alignment for stack boundary in bits. */
776 int ix86_preferred_stack_boundary;
778 /* Values 1-5: see jump.c */
779 int ix86_branch_cost;
780 const char *ix86_branch_cost_string;
782 /* Power of two alignment for functions. */
783 const char *ix86_align_funcs_string;
785 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
786 static char internal_label_prefix[16];
787 static int internal_label_prefix_len;
789 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
790 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
791 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
792 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
794 static const char *get_some_local_dynamic_name PARAMS ((void));
795 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
796 static rtx maybe_get_pool_constant PARAMS ((rtx));
797 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
798 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
800 static rtx get_thread_pointer PARAMS ((void));
801 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
802 static rtx gen_push PARAMS ((rtx));
803 static int memory_address_length PARAMS ((rtx addr));
804 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
806 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
807 static void ix86_dump_ppro_packet PARAMS ((FILE *));
808 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
809 static struct machine_function * ix86_init_machine_status PARAMS ((void));
810 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
811 static int ix86_nsaved_regs PARAMS ((void));
812 static void ix86_emit_save_regs PARAMS ((void));
813 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
814 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
815 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
816 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
817 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
818 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
819 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
820 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
821 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
822 static int ix86_issue_rate PARAMS ((void));
823 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
824 static void ix86_sched_init PARAMS ((FILE *, int, int));
825 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
826 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
827 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
828 static int ia32_multipass_dfa_lookahead PARAMS ((void));
829 static void ix86_init_mmx_sse_builtins PARAMS ((void));
830 static rtx x86_this_parameter PARAMS ((tree));
831 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
832 HOST_WIDE_INT, tree));
833 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree));
835 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
839 rtx base, index, disp;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
849 static const char *ix86_strip_name_encoding PARAMS ((const char *))
852 struct builtin_description;
853 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
855 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
857 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
858 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
859 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
860 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
861 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
862 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
863 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
867 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
869 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
871 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
872 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
873 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
874 static int ix86_save_reg PARAMS ((unsigned int, int));
875 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
876 static int ix86_comp_type_attributes PARAMS ((tree, tree));
877 static int ix86_fntype_regparm PARAMS ((tree));
878 const struct attribute_spec ix86_attribute_table[];
879 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
880 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
881 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int ix86_value_regno PARAMS ((enum machine_mode));
883 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
884 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
885 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
886 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
888 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
889 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
892 /* Register class used for passing given 64bit part of the argument.
893 These represent classes as documented by the PS ABI, with the exception
894 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
895 use SF or DFmode move instead of DImode to avoid reformatting penalties.
897 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
898 whenever possible (upper half does contain padding).
900 enum x86_64_reg_class
903 X86_64_INTEGER_CLASS,
904 X86_64_INTEGERSI_CLASS,
913 static const char * const x86_64_reg_class_name[] =
914 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
916 #define MAX_CLASSES 4
917 static int classify_argument PARAMS ((enum machine_mode, tree,
918 enum x86_64_reg_class [MAX_CLASSES],
920 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
922 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
924 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
925 enum x86_64_reg_class));
927 /* Initialize the GCC target structure. */
928 #undef TARGET_ATTRIBUTE_TABLE
929 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
930 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
931 # undef TARGET_MERGE_DECL_ATTRIBUTES
932 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
935 #undef TARGET_COMP_TYPE_ATTRIBUTES
936 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
938 #undef TARGET_INIT_BUILTINS
939 #define TARGET_INIT_BUILTINS ix86_init_builtins
941 #undef TARGET_EXPAND_BUILTIN
942 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
944 #undef TARGET_ASM_FUNCTION_EPILOGUE
945 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
947 #undef TARGET_ASM_OPEN_PAREN
948 #define TARGET_ASM_OPEN_PAREN ""
949 #undef TARGET_ASM_CLOSE_PAREN
950 #define TARGET_ASM_CLOSE_PAREN ""
952 #undef TARGET_ASM_ALIGNED_HI_OP
953 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
954 #undef TARGET_ASM_ALIGNED_SI_OP
955 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
957 #undef TARGET_ASM_ALIGNED_DI_OP
958 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
961 #undef TARGET_ASM_UNALIGNED_HI_OP
962 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
963 #undef TARGET_ASM_UNALIGNED_SI_OP
964 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
965 #undef TARGET_ASM_UNALIGNED_DI_OP
966 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
968 #undef TARGET_SCHED_ADJUST_COST
969 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
970 #undef TARGET_SCHED_ISSUE_RATE
971 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
972 #undef TARGET_SCHED_VARIABLE_ISSUE
973 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
974 #undef TARGET_SCHED_INIT
975 #define TARGET_SCHED_INIT ix86_sched_init
976 #undef TARGET_SCHED_REORDER
977 #define TARGET_SCHED_REORDER ix86_sched_reorder
978 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
979 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
980 ia32_use_dfa_pipeline_interface
981 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
982 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
983 ia32_multipass_dfa_lookahead
985 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
986 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
989 #undef TARGET_HAVE_TLS
990 #define TARGET_HAVE_TLS true
992 #undef TARGET_CANNOT_FORCE_CONST_MEM
993 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
995 #undef TARGET_DELEGITIMIZE_ADDRESS
996 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
998 #undef TARGET_MS_BITFIELD_LAYOUT_P
999 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1001 #undef TARGET_ASM_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1003 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1004 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1006 #undef TARGET_RTX_COSTS
1007 #define TARGET_RTX_COSTS ix86_rtx_costs
1008 #undef TARGET_ADDRESS_COST
1009 #define TARGET_ADDRESS_COST ix86_address_cost
1011 struct gcc_target targetm = TARGET_INITIALIZER;
1013 /* Sometimes certain combinations of command options do not make
1014 sense on a particular target machine. You can define a macro
1015 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1016 defined, is executed once just after all the command options have
1019 Don't use this macro to turn on various extra optimizations for
1020 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1026 /* Comes from final.c -- no real reason to change it. */
1027 #define MAX_CODE_ALIGN 16
1031 const struct processor_costs *cost; /* Processor costs */
1032 const int target_enable; /* Target flags to enable. */
1033 const int target_disable; /* Target flags to disable. */
1034 const int align_loop; /* Default alignments. */
1035 const int align_loop_max_skip;
1036 const int align_jump;
1037 const int align_jump_max_skip;
1038 const int align_func;
1040 const processor_target_table[PROCESSOR_max] =
1042 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1043 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1044 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1045 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1046 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1047 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1048 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1049 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1052 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1055 const char *const name; /* processor name or nickname. */
1056 const enum processor_type processor;
1057 const enum pta_flags
1062 PTA_PREFETCH_SSE = 8,
1068 const processor_alias_table[] =
1070 {"i386", PROCESSOR_I386, 0},
1071 {"i486", PROCESSOR_I486, 0},
1072 {"i586", PROCESSOR_PENTIUM, 0},
1073 {"pentium", PROCESSOR_PENTIUM, 0},
1074 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1075 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1076 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1077 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1078 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1079 {"i686", PROCESSOR_PENTIUMPRO, 0},
1080 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1081 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1082 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1083 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1084 PTA_MMX | PTA_PREFETCH_SSE},
1085 {"k6", PROCESSOR_K6, PTA_MMX},
1086 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1087 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1088 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1090 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1091 | PTA_3DNOW | PTA_3DNOW_A},
1092 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1093 | PTA_3DNOW_A | PTA_SSE},
1094 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1095 | PTA_3DNOW_A | PTA_SSE},
1096 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1097 | PTA_3DNOW_A | PTA_SSE},
1098 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1099 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1102 int const pta_size = ARRAY_SIZE (processor_alias_table);
1104 /* By default our XFmode is the 80-bit extended format. If we have
1105 use TFmode instead, it's also the 80-bit format, but with padding. */
1106 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1107 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1109 /* Set the default values for switches whose default depends on TARGET_64BIT
1110 in case they weren't overwritten by command line options. */
1113 if (flag_omit_frame_pointer == 2)
1114 flag_omit_frame_pointer = 1;
1115 if (flag_asynchronous_unwind_tables == 2)
1116 flag_asynchronous_unwind_tables = 1;
1117 if (flag_pcc_struct_return == 2)
1118 flag_pcc_struct_return = 0;
1122 if (flag_omit_frame_pointer == 2)
1123 flag_omit_frame_pointer = 0;
1124 if (flag_asynchronous_unwind_tables == 2)
1125 flag_asynchronous_unwind_tables = 0;
1126 if (flag_pcc_struct_return == 2)
1127 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1130 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1131 SUBTARGET_OVERRIDE_OPTIONS;
1134 if (!ix86_cpu_string && ix86_arch_string)
1135 ix86_cpu_string = ix86_arch_string;
1136 if (!ix86_cpu_string)
1137 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1138 if (!ix86_arch_string)
1139 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1141 if (ix86_cmodel_string != 0)
1143 if (!strcmp (ix86_cmodel_string, "small"))
1144 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1146 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1147 else if (!strcmp (ix86_cmodel_string, "32"))
1148 ix86_cmodel = CM_32;
1149 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1150 ix86_cmodel = CM_KERNEL;
1151 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1152 ix86_cmodel = CM_MEDIUM;
1153 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1154 ix86_cmodel = CM_LARGE;
1156 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1160 ix86_cmodel = CM_32;
1162 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1164 if (ix86_asm_string != 0)
1166 if (!strcmp (ix86_asm_string, "intel"))
1167 ix86_asm_dialect = ASM_INTEL;
1168 else if (!strcmp (ix86_asm_string, "att"))
1169 ix86_asm_dialect = ASM_ATT;
1171 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1173 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1174 error ("code model `%s' not supported in the %s bit mode",
1175 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1176 if (ix86_cmodel == CM_LARGE)
1177 sorry ("code model `large' not supported yet");
1178 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1179 sorry ("%i-bit mode not compiled in",
1180 (target_flags & MASK_64BIT) ? 64 : 32);
1182 for (i = 0; i < pta_size; i++)
1183 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1185 ix86_arch = processor_alias_table[i].processor;
1186 /* Default cpu tuning to the architecture. */
1187 ix86_cpu = ix86_arch;
1188 if (processor_alias_table[i].flags & PTA_MMX
1189 && !(target_flags_explicit & MASK_MMX))
1190 target_flags |= MASK_MMX;
1191 if (processor_alias_table[i].flags & PTA_3DNOW
1192 && !(target_flags_explicit & MASK_3DNOW))
1193 target_flags |= MASK_3DNOW;
1194 if (processor_alias_table[i].flags & PTA_3DNOW_A
1195 && !(target_flags_explicit & MASK_3DNOW_A))
1196 target_flags |= MASK_3DNOW_A;
1197 if (processor_alias_table[i].flags & PTA_SSE
1198 && !(target_flags_explicit & MASK_SSE))
1199 target_flags |= MASK_SSE;
1200 if (processor_alias_table[i].flags & PTA_SSE2
1201 && !(target_flags_explicit & MASK_SSE2))
1202 target_flags |= MASK_SSE2;
1203 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1204 x86_prefetch_sse = true;
1205 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1206 error ("CPU you selected does not support x86-64 instruction set");
1211 error ("bad value (%s) for -march= switch", ix86_arch_string);
1213 for (i = 0; i < pta_size; i++)
1214 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1216 ix86_cpu = processor_alias_table[i].processor;
1217 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1218 error ("CPU you selected does not support x86-64 instruction set");
1221 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1222 x86_prefetch_sse = true;
1224 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1227 ix86_cost = &size_cost;
1229 ix86_cost = processor_target_table[ix86_cpu].cost;
1230 target_flags |= processor_target_table[ix86_cpu].target_enable;
1231 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1233 /* Arrange to set up i386_stack_locals for all functions. */
1234 init_machine_status = ix86_init_machine_status;
1236 /* Validate -mregparm= value. */
1237 if (ix86_regparm_string)
1239 i = atoi (ix86_regparm_string);
1240 if (i < 0 || i > REGPARM_MAX)
1241 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1247 ix86_regparm = REGPARM_MAX;
1249 /* If the user has provided any of the -malign-* options,
1250 warn and use that value only if -falign-* is not set.
1251 Remove this code in GCC 3.2 or later. */
1252 if (ix86_align_loops_string)
1254 warning ("-malign-loops is obsolete, use -falign-loops");
1255 if (align_loops == 0)
1257 i = atoi (ix86_align_loops_string);
1258 if (i < 0 || i > MAX_CODE_ALIGN)
1259 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1261 align_loops = 1 << i;
1265 if (ix86_align_jumps_string)
1267 warning ("-malign-jumps is obsolete, use -falign-jumps");
1268 if (align_jumps == 0)
1270 i = atoi (ix86_align_jumps_string);
1271 if (i < 0 || i > MAX_CODE_ALIGN)
1272 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1274 align_jumps = 1 << i;
1278 if (ix86_align_funcs_string)
1280 warning ("-malign-functions is obsolete, use -falign-functions");
1281 if (align_functions == 0)
1283 i = atoi (ix86_align_funcs_string);
1284 if (i < 0 || i > MAX_CODE_ALIGN)
1285 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1287 align_functions = 1 << i;
1291 /* Default align_* from the processor table. */
1292 if (align_loops == 0)
1294 align_loops = processor_target_table[ix86_cpu].align_loop;
1295 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1297 if (align_jumps == 0)
1299 align_jumps = processor_target_table[ix86_cpu].align_jump;
1300 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1302 if (align_functions == 0)
1304 align_functions = processor_target_table[ix86_cpu].align_func;
1307 /* Validate -mpreferred-stack-boundary= value, or provide default.
1308 The default of 128 bits is for Pentium III's SSE __m128, but we
1309 don't want additional code to keep the stack aligned when
1310 optimizing for code size. */
1311 ix86_preferred_stack_boundary = (optimize_size
1312 ? TARGET_64BIT ? 128 : 32
1314 if (ix86_preferred_stack_boundary_string)
1316 i = atoi (ix86_preferred_stack_boundary_string);
1317 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1318 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1319 TARGET_64BIT ? 4 : 2);
1321 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1324 /* Validate -mbranch-cost= value, or provide default. */
1325 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1326 if (ix86_branch_cost_string)
1328 i = atoi (ix86_branch_cost_string);
1330 error ("-mbranch-cost=%d is not between 0 and 5", i);
1332 ix86_branch_cost = i;
1335 if (ix86_tls_dialect_string)
1337 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1338 ix86_tls_dialect = TLS_DIALECT_GNU;
1339 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1340 ix86_tls_dialect = TLS_DIALECT_SUN;
1342 error ("bad value (%s) for -mtls-dialect= switch",
1343 ix86_tls_dialect_string);
1346 /* Keep nonleaf frame pointers. */
1347 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1348 flag_omit_frame_pointer = 1;
1350 /* If we're doing fast math, we don't care about comparison order
1351 wrt NaNs. This lets us use a shorter comparison sequence. */
1352 if (flag_unsafe_math_optimizations)
1353 target_flags &= ~MASK_IEEE_FP;
1355 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1356 since the insns won't need emulation. */
1357 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1358 target_flags &= ~MASK_NO_FANCY_MATH_387;
1362 if (TARGET_ALIGN_DOUBLE)
1363 error ("-malign-double makes no sense in the 64bit mode");
1365 error ("-mrtd calling convention not supported in the 64bit mode");
1366 /* Enable by default the SSE and MMX builtins. */
1367 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1368 ix86_fpmath = FPMATH_SSE;
1371 ix86_fpmath = FPMATH_387;
1373 if (ix86_fpmath_string != 0)
1375 if (! strcmp (ix86_fpmath_string, "387"))
1376 ix86_fpmath = FPMATH_387;
1377 else if (! strcmp (ix86_fpmath_string, "sse"))
1381 warning ("SSE instruction set disabled, using 387 arithmetics");
1382 ix86_fpmath = FPMATH_387;
1385 ix86_fpmath = FPMATH_SSE;
1387 else if (! strcmp (ix86_fpmath_string, "387,sse")
1388 || ! strcmp (ix86_fpmath_string, "sse,387"))
1392 warning ("SSE instruction set disabled, using 387 arithmetics");
1393 ix86_fpmath = FPMATH_387;
1395 else if (!TARGET_80387)
1397 warning ("387 instruction set disabled, using SSE arithmetics");
1398 ix86_fpmath = FPMATH_SSE;
1401 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1404 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1407 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1411 target_flags |= MASK_MMX;
1412 x86_prefetch_sse = true;
1415 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1418 target_flags |= MASK_MMX;
1419 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1420 extensions it adds. */
1421 if (x86_3dnow_a & (1 << ix86_arch))
1422 target_flags |= MASK_3DNOW_A;
1424 if ((x86_accumulate_outgoing_args & CPUMASK)
1425 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1427 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1429 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1432 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1433 p = strchr (internal_label_prefix, 'X');
1434 internal_label_prefix_len = p - internal_label_prefix;
1440 optimization_options (level, size)
1442 int size ATTRIBUTE_UNUSED;
1444 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1445 make the problem with not enough registers even worse. */
1446 #ifdef INSN_SCHEDULING
1448 flag_schedule_insns = 0;
1451 /* The default values of these switches depend on the TARGET_64BIT
1452 that is not known at this moment. Mark these values with 2 and
1453 let user the to override these. In case there is no command line option
1454 specifying them, we will set the defaults in override_options. */
1456 flag_omit_frame_pointer = 2;
1457 flag_pcc_struct_return = 2;
1458 flag_asynchronous_unwind_tables = 2;
1461 /* Table of valid machine attributes. */
1462 const struct attribute_spec ix86_attribute_table[] =
1464 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1465 /* Stdcall attribute says callee is responsible for popping arguments
1466 if they are not variable. */
1467 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1468 /* Fastcall attribute says callee is responsible for popping arguments
1469 if they are not variable. */
1470 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1471 /* Cdecl attribute says the callee is a normal C declaration */
1472 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1473 /* Regparm attribute specifies how many integer arguments are to be
1474 passed in registers. */
1475 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1476 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1477 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1478 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1479 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1481 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1482 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1483 { NULL, 0, 0, false, false, false, NULL }
1486 /* Decide whether we can make a sibling call to a function. DECL is the
1487 declaration of the function being targeted by the call and EXP is the
1488 CALL_EXPR representing the call. */
1491 ix86_function_ok_for_sibcall (decl, exp)
1495 /* If we are generating position-independent code, we cannot sibcall
1496 optimize any indirect call, or a direct call to a global function,
1497 as the PLT requires %ebx be live. */
1498 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1501 /* If we are returning floats on the 80387 register stack, we cannot
1502 make a sibcall from a function that doesn't return a float to a
1503 function that does or, conversely, from a function that does return
1504 a float to a function that doesn't; the necessary stack adjustment
1505 would not be executed. */
1506 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1507 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1510 /* If this call is indirect, we'll need to be able to use a call-clobbered
1511 register for the address of the target function. Make sure that all
1512 such registers are not used for passing parameters. */
1513 if (!decl && !TARGET_64BIT)
1515 int regparm = ix86_regparm;
1518 /* We're looking at the CALL_EXPR, we need the type of the function. */
1519 type = TREE_OPERAND (exp, 0); /* pointer expression */
1520 type = TREE_TYPE (type); /* pointer type */
1521 type = TREE_TYPE (type); /* function type */
1523 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1525 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1529 /* ??? Need to count the actual number of registers to be used,
1530 not the possible number of registers. Fix later. */
1535 /* Otherwise okay. That also includes certain types of indirect calls. */
1539 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1540 arguments as in struct attribute_spec.handler. */
1542 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1545 tree args ATTRIBUTE_UNUSED;
1546 int flags ATTRIBUTE_UNUSED;
1549 if (TREE_CODE (*node) != FUNCTION_TYPE
1550 && TREE_CODE (*node) != METHOD_TYPE
1551 && TREE_CODE (*node) != FIELD_DECL
1552 && TREE_CODE (*node) != TYPE_DECL)
1554 warning ("`%s' attribute only applies to functions",
1555 IDENTIFIER_POINTER (name));
1556 *no_add_attrs = true;
1560 if (is_attribute_p ("fastcall", name))
1562 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1564 error ("fastcall and stdcall attributes are not compatible");
1566 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1568 error ("fastcall and regparm attributes are not compatible");
1571 else if (is_attribute_p ("stdcall", name))
1573 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1575 error ("fastcall and stdcall attributes are not compatible");
1582 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1583 *no_add_attrs = true;
1589 /* Handle a "regparm" attribute;
1590 arguments as in struct attribute_spec.handler. */
1592 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1596 int flags ATTRIBUTE_UNUSED;
1599 if (TREE_CODE (*node) != FUNCTION_TYPE
1600 && TREE_CODE (*node) != METHOD_TYPE
1601 && TREE_CODE (*node) != FIELD_DECL
1602 && TREE_CODE (*node) != TYPE_DECL)
1604 warning ("`%s' attribute only applies to functions",
1605 IDENTIFIER_POINTER (name));
1606 *no_add_attrs = true;
1612 cst = TREE_VALUE (args);
1613 if (TREE_CODE (cst) != INTEGER_CST)
1615 warning ("`%s' attribute requires an integer constant argument",
1616 IDENTIFIER_POINTER (name));
1617 *no_add_attrs = true;
1619 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1621 warning ("argument to `%s' attribute larger than %d",
1622 IDENTIFIER_POINTER (name), REGPARM_MAX);
1623 *no_add_attrs = true;
1626 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1628 error ("fastcall and regparm attributes are not compatible");
1635 /* Return 0 if the attributes for two types are incompatible, 1 if they
1636 are compatible, and 2 if they are nearly compatible (which causes a
1637 warning to be generated). */
1640 ix86_comp_type_attributes (type1, type2)
1644 /* Check for mismatch of non-default calling convention. */
1645 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1647 if (TREE_CODE (type1) != FUNCTION_TYPE)
1650 /* Check for mismatched fastcall types */
1651 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1652 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1655 /* Check for mismatched return types (cdecl vs stdcall). */
1656 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1657 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1662 /* Return the regparm value for a fuctio with the indicated TYPE. */
1665 ix86_fntype_regparm (type)
1670 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1672 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1674 return ix86_regparm;
1677 /* Value is the number of bytes of arguments automatically
1678 popped when returning from a subroutine call.
1679 FUNDECL is the declaration node of the function (as a tree),
1680 FUNTYPE is the data type of the function (as a tree),
1681 or for a library call it is an identifier node for the subroutine name.
1682 SIZE is the number of bytes of arguments passed on the stack.
1684 On the 80386, the RTD insn may be used to pop them if the number
1685 of args is fixed, but if the number is variable then the caller
1686 must pop them all. RTD can't be used for library calls now
1687 because the library is compiled with the Unix compiler.
1688 Use of RTD is a selectable option, since it is incompatible with
1689 standard Unix calling sequences. If the option is not selected,
1690 the caller must always pop the args.
1692 The attribute stdcall is equivalent to RTD on a per module basis. */
1695 ix86_return_pops_args (fundecl, funtype, size)
1700 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1702 /* Cdecl functions override -mrtd, and never pop the stack. */
1703 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1705 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1706 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1707 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1711 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1712 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1713 == void_type_node)))
1717 /* Lose any fake structure return argument if it is passed on the stack. */
1718 if (aggregate_value_p (TREE_TYPE (funtype))
1721 int nregs = ix86_fntype_regparm (funtype);
1724 return GET_MODE_SIZE (Pmode);
1730 /* Argument support functions. */
1732 /* Return true when register may be used to pass function parameters. */
1734 ix86_function_arg_regno_p (regno)
1739 return (regno < REGPARM_MAX
1740 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1741 if (SSE_REGNO_P (regno) && TARGET_SSE)
1743 /* RAX is used as hidden argument to va_arg functions. */
1746 for (i = 0; i < REGPARM_MAX; i++)
1747 if (regno == x86_64_int_parameter_registers[i])
1752 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1753 for a call to a function whose data type is FNTYPE.
1754 For a library call, FNTYPE is 0. */
1757 init_cumulative_args (cum, fntype, libname)
1758 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1759 tree fntype; /* tree ptr for function decl */
1760 rtx libname; /* SYMBOL_REF of library name or 0 */
1762 static CUMULATIVE_ARGS zero_cum;
1763 tree param, next_param;
1765 if (TARGET_DEBUG_ARG)
1767 fprintf (stderr, "\ninit_cumulative_args (");
1769 fprintf (stderr, "fntype code = %s, ret code = %s",
1770 tree_code_name[(int) TREE_CODE (fntype)],
1771 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1773 fprintf (stderr, "no fntype");
1776 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1781 /* Set up the number of registers to use for passing arguments. */
1782 cum->nregs = ix86_regparm;
1783 cum->sse_nregs = SSE_REGPARM_MAX;
1784 if (fntype && !TARGET_64BIT)
1786 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1789 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1791 cum->maybe_vaarg = false;
1793 /* Use ecx and edx registers if function has fastcall attribute */
1794 if (fntype && !TARGET_64BIT)
1796 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1804 /* Determine if this function has variable arguments. This is
1805 indicated by the last argument being 'void_type_mode' if there
1806 are no variable arguments. If there are variable arguments, then
1807 we won't pass anything in registers */
1811 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1812 param != 0; param = next_param)
1814 next_param = TREE_CHAIN (param);
1815 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1822 cum->maybe_vaarg = true;
1826 if ((!fntype && !libname)
1827 || (fntype && !TYPE_ARG_TYPES (fntype)))
1828 cum->maybe_vaarg = 1;
1830 if (TARGET_DEBUG_ARG)
1831 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1836 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1837 of this code is to classify each 8bytes of incoming argument by the register
1838 class and assign registers accordingly. */
1840 /* Return the union class of CLASS1 and CLASS2.
1841 See the x86-64 PS ABI for details. */
1843 static enum x86_64_reg_class
1844 merge_classes (class1, class2)
1845 enum x86_64_reg_class class1, class2;
1847 /* Rule #1: If both classes are equal, this is the resulting class. */
1848 if (class1 == class2)
1851 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1853 if (class1 == X86_64_NO_CLASS)
1855 if (class2 == X86_64_NO_CLASS)
1858 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1859 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1860 return X86_64_MEMORY_CLASS;
1862 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1863 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1864 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1865 return X86_64_INTEGERSI_CLASS;
1866 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1867 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1868 return X86_64_INTEGER_CLASS;
1870 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1871 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1872 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1873 return X86_64_MEMORY_CLASS;
1875 /* Rule #6: Otherwise class SSE is used. */
1876 return X86_64_SSE_CLASS;
1879 /* Classify the argument of type TYPE and mode MODE.
1880 CLASSES will be filled by the register class used to pass each word
1881 of the operand. The number of words is returned. In case the parameter
1882 should be passed in memory, 0 is returned. As a special case for zero
1883 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1885 BIT_OFFSET is used internally for handling records and specifies offset
1886 of the offset in bits modulo 256 to avoid overflow cases.
1888 See the x86-64 PS ABI for details.
1892 classify_argument (mode, type, classes, bit_offset)
1893 enum machine_mode mode;
1895 enum x86_64_reg_class classes[MAX_CLASSES];
1899 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1900 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1902 /* Variable sized entities are always passed/returned in memory. */
1906 if (type && AGGREGATE_TYPE_P (type))
1910 enum x86_64_reg_class subclasses[MAX_CLASSES];
1912 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1916 for (i = 0; i < words; i++)
1917 classes[i] = X86_64_NO_CLASS;
1919 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1920 signalize memory class, so handle it as special case. */
1923 classes[0] = X86_64_NO_CLASS;
1927 /* Classify each field of record and merge classes. */
1928 if (TREE_CODE (type) == RECORD_TYPE)
1930 /* For classes first merge in the field of the subclasses. */
1931 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1933 tree bases = TYPE_BINFO_BASETYPES (type);
1934 int n_bases = TREE_VEC_LENGTH (bases);
1937 for (i = 0; i < n_bases; ++i)
1939 tree binfo = TREE_VEC_ELT (bases, i);
1941 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1942 tree type = BINFO_TYPE (binfo);
1944 num = classify_argument (TYPE_MODE (type),
1946 (offset + bit_offset) % 256);
1949 for (i = 0; i < num; i++)
1951 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1953 merge_classes (subclasses[i], classes[i + pos]);
1957 /* And now merge the fields of structure. */
1958 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1960 if (TREE_CODE (field) == FIELD_DECL)
1964 /* Bitfields are always classified as integer. Handle them
1965 early, since later code would consider them to be
1966 misaligned integers. */
1967 if (DECL_BIT_FIELD (field))
1969 for (i = int_bit_position (field) / 8 / 8;
1970 i < (int_bit_position (field)
1971 + tree_low_cst (DECL_SIZE (field), 0)
1974 merge_classes (X86_64_INTEGER_CLASS,
1979 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1980 TREE_TYPE (field), subclasses,
1981 (int_bit_position (field)
1982 + bit_offset) % 256);
1985 for (i = 0; i < num; i++)
1988 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1990 merge_classes (subclasses[i], classes[i + pos]);
1996 /* Arrays are handled as small records. */
1997 else if (TREE_CODE (type) == ARRAY_TYPE)
2000 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2001 TREE_TYPE (type), subclasses, bit_offset);
2005 /* The partial classes are now full classes. */
2006 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2007 subclasses[0] = X86_64_SSE_CLASS;
2008 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2009 subclasses[0] = X86_64_INTEGER_CLASS;
2011 for (i = 0; i < words; i++)
2012 classes[i] = subclasses[i % num];
2014 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2015 else if (TREE_CODE (type) == UNION_TYPE
2016 || TREE_CODE (type) == QUAL_UNION_TYPE)
2018 /* For classes first merge in the field of the subclasses. */
2019 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2021 tree bases = TYPE_BINFO_BASETYPES (type);
2022 int n_bases = TREE_VEC_LENGTH (bases);
2025 for (i = 0; i < n_bases; ++i)
2027 tree binfo = TREE_VEC_ELT (bases, i);
2029 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2030 tree type = BINFO_TYPE (binfo);
2032 num = classify_argument (TYPE_MODE (type),
2034 (offset + (bit_offset % 64)) % 256);
2037 for (i = 0; i < num; i++)
2039 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2041 merge_classes (subclasses[i], classes[i + pos]);
2045 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2047 if (TREE_CODE (field) == FIELD_DECL)
2050 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2051 TREE_TYPE (field), subclasses,
2055 for (i = 0; i < num; i++)
2056 classes[i] = merge_classes (subclasses[i], classes[i]);
2063 /* Final merger cleanup. */
2064 for (i = 0; i < words; i++)
2066 /* If one class is MEMORY, everything should be passed in
2068 if (classes[i] == X86_64_MEMORY_CLASS)
2071 /* The X86_64_SSEUP_CLASS should be always preceded by
2072 X86_64_SSE_CLASS. */
2073 if (classes[i] == X86_64_SSEUP_CLASS
2074 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2075 classes[i] = X86_64_SSE_CLASS;
2077 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2078 if (classes[i] == X86_64_X87UP_CLASS
2079 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2080 classes[i] = X86_64_SSE_CLASS;
2085 /* Compute alignment needed. We align all types to natural boundaries with
2086 exception of XFmode that is aligned to 64bits. */
2087 if (mode != VOIDmode && mode != BLKmode)
2089 int mode_alignment = GET_MODE_BITSIZE (mode);
2092 mode_alignment = 128;
2093 else if (mode == XCmode)
2094 mode_alignment = 256;
2095 /* Misaligned fields are always returned in memory. */
2096 if (bit_offset % mode_alignment)
2100 /* Classification of atomic types. */
2110 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2111 classes[0] = X86_64_INTEGERSI_CLASS;
2113 classes[0] = X86_64_INTEGER_CLASS;
2117 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2120 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2121 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2124 if (!(bit_offset % 64))
2125 classes[0] = X86_64_SSESF_CLASS;
2127 classes[0] = X86_64_SSE_CLASS;
2130 classes[0] = X86_64_SSEDF_CLASS;
2133 classes[0] = X86_64_X87_CLASS;
2134 classes[1] = X86_64_X87UP_CLASS;
2137 classes[0] = X86_64_X87_CLASS;
2138 classes[1] = X86_64_X87UP_CLASS;
2139 classes[2] = X86_64_X87_CLASS;
2140 classes[3] = X86_64_X87UP_CLASS;
2143 classes[0] = X86_64_SSEDF_CLASS;
2144 classes[1] = X86_64_SSEDF_CLASS;
2147 classes[0] = X86_64_SSE_CLASS;
2155 classes[0] = X86_64_SSE_CLASS;
2156 classes[1] = X86_64_SSEUP_CLASS;
2171 /* Examine the argument and return set number of register required in each
2172 class. Return 0 iff parameter should be passed in memory. */
2174 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2175 enum machine_mode mode;
2177 int *int_nregs, *sse_nregs;
2180 enum x86_64_reg_class class[MAX_CLASSES];
2181 int n = classify_argument (mode, type, class, 0);
2187 for (n--; n >= 0; n--)
2190 case X86_64_INTEGER_CLASS:
2191 case X86_64_INTEGERSI_CLASS:
2194 case X86_64_SSE_CLASS:
2195 case X86_64_SSESF_CLASS:
2196 case X86_64_SSEDF_CLASS:
2199 case X86_64_NO_CLASS:
2200 case X86_64_SSEUP_CLASS:
2202 case X86_64_X87_CLASS:
2203 case X86_64_X87UP_CLASS:
2207 case X86_64_MEMORY_CLASS:
2212 /* Construct container for the argument used by GCC interface. See
2213 FUNCTION_ARG for the detailed description. */
2215 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2216 enum machine_mode mode;
2219 int nintregs, nsseregs;
2223 enum machine_mode tmpmode;
2225 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2226 enum x86_64_reg_class class[MAX_CLASSES];
2230 int needed_sseregs, needed_intregs;
2231 rtx exp[MAX_CLASSES];
2234 n = classify_argument (mode, type, class, 0);
2235 if (TARGET_DEBUG_ARG)
2238 fprintf (stderr, "Memory class\n");
2241 fprintf (stderr, "Classes:");
2242 for (i = 0; i < n; i++)
2244 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2246 fprintf (stderr, "\n");
2251 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2253 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2256 /* First construct simple cases. Avoid SCmode, since we want to use
2257 single register to pass this type. */
2258 if (n == 1 && mode != SCmode)
2261 case X86_64_INTEGER_CLASS:
2262 case X86_64_INTEGERSI_CLASS:
2263 return gen_rtx_REG (mode, intreg[0]);
2264 case X86_64_SSE_CLASS:
2265 case X86_64_SSESF_CLASS:
2266 case X86_64_SSEDF_CLASS:
2267 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2268 case X86_64_X87_CLASS:
2269 return gen_rtx_REG (mode, FIRST_STACK_REG);
2270 case X86_64_NO_CLASS:
2271 /* Zero sized array, struct or class. */
2276 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2277 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2279 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2280 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2281 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2282 && class[1] == X86_64_INTEGER_CLASS
2283 && (mode == CDImode || mode == TImode)
2284 && intreg[0] + 1 == intreg[1])
2285 return gen_rtx_REG (mode, intreg[0]);
2287 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2288 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2289 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2291 /* Otherwise figure out the entries of the PARALLEL. */
2292 for (i = 0; i < n; i++)
2296 case X86_64_NO_CLASS:
2298 case X86_64_INTEGER_CLASS:
2299 case X86_64_INTEGERSI_CLASS:
2300 /* Merge TImodes on aligned occasions here too. */
2301 if (i * 8 + 8 > bytes)
2302 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2303 else if (class[i] == X86_64_INTEGERSI_CLASS)
2307 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2308 if (tmpmode == BLKmode)
2310 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2311 gen_rtx_REG (tmpmode, *intreg),
2315 case X86_64_SSESF_CLASS:
2316 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2317 gen_rtx_REG (SFmode,
2318 SSE_REGNO (sse_regno)),
2322 case X86_64_SSEDF_CLASS:
2323 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2324 gen_rtx_REG (DFmode,
2325 SSE_REGNO (sse_regno)),
2329 case X86_64_SSE_CLASS:
2330 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2334 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2335 gen_rtx_REG (tmpmode,
2336 SSE_REGNO (sse_regno)),
2338 if (tmpmode == TImode)
2346 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2347 for (i = 0; i < nexps; i++)
2348 XVECEXP (ret, 0, i) = exp [i];
2352 /* Update the data in CUM to advance over an argument
2353 of mode MODE and data type TYPE.
2354 (TYPE is null for libcalls where that information may not be available.) */
2357 function_arg_advance (cum, mode, type, named)
2358 CUMULATIVE_ARGS *cum; /* current arg information */
2359 enum machine_mode mode; /* current arg mode */
2360 tree type; /* type of the argument or 0 if lib support */
2361 int named; /* whether or not the argument was named */
2364 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2365 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2367 if (TARGET_DEBUG_ARG)
2369 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2370 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2373 int int_nregs, sse_nregs;
2374 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2375 cum->words += words;
2376 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2378 cum->nregs -= int_nregs;
2379 cum->sse_nregs -= sse_nregs;
2380 cum->regno += int_nregs;
2381 cum->sse_regno += sse_nregs;
2384 cum->words += words;
2388 if (TARGET_SSE && mode == TImode)
2390 cum->sse_words += words;
2391 cum->sse_nregs -= 1;
2392 cum->sse_regno += 1;
2393 if (cum->sse_nregs <= 0)
2401 cum->words += words;
2402 cum->nregs -= words;
2403 cum->regno += words;
2405 if (cum->nregs <= 0)
2415 /* Define where to put the arguments to a function.
2416 Value is zero to push the argument on the stack,
2417 or a hard register in which to store the argument.
2419 MODE is the argument's machine mode.
2420 TYPE is the data type of the argument (as a tree).
2421 This is null for libcalls where that information may
2423 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2424 the preceding args and about the function being called.
2425 NAMED is nonzero if this argument is a named parameter
2426 (otherwise it is an extra parameter matching an ellipsis). */
2429 function_arg (cum, mode, type, named)
2430 CUMULATIVE_ARGS *cum; /* current arg information */
2431 enum machine_mode mode; /* current arg mode */
2432 tree type; /* type of the argument or 0 if lib support */
2433 int named; /* != 0 for normal args, == 0 for ... args */
2437 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2438 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2440 /* Handle a hidden AL argument containing number of registers for varargs
2441 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2443 if (mode == VOIDmode)
2446 return GEN_INT (cum->maybe_vaarg
2447 ? (cum->sse_nregs < 0
2455 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2456 &x86_64_int_parameter_registers [cum->regno],
2461 /* For now, pass fp/complex values on the stack. */
2470 if (words <= cum->nregs)
2472 int regno = cum->regno;
2474 /* Fastcall allocates the first two DWORD (SImode) or
2475 smaller arguments to ECX and EDX. */
2478 if (mode == BLKmode || mode == DImode)
2481 /* ECX not EAX is the first allocated register. */
2485 ret = gen_rtx_REG (mode, regno);
2490 ret = gen_rtx_REG (mode, cum->sse_regno);
2494 if (TARGET_DEBUG_ARG)
2497 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2498 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2501 print_simple_rtl (stderr, ret);
2503 fprintf (stderr, ", stack");
2505 fprintf (stderr, " )\n");
2511 /* A C expression that indicates when an argument must be passed by
2512 reference. If nonzero for an argument, a copy of that argument is
2513 made in memory and a pointer to the argument is passed instead of
2514 the argument itself. The pointer is passed in whatever way is
2515 appropriate for passing a pointer to that type. */
2518 function_arg_pass_by_reference (cum, mode, type, named)
2519 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2520 enum machine_mode mode ATTRIBUTE_UNUSED;
2522 int named ATTRIBUTE_UNUSED;
2527 if (type && int_size_in_bytes (type) == -1)
2529 if (TARGET_DEBUG_ARG)
2530 fprintf (stderr, "function_arg_pass_by_reference\n");
2537 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2541 ix86_function_arg_boundary (mode, type)
2542 enum machine_mode mode;
2547 return PARM_BOUNDARY;
2549 align = TYPE_ALIGN (type);
2551 align = GET_MODE_ALIGNMENT (mode);
2552 if (align < PARM_BOUNDARY)
2553 align = PARM_BOUNDARY;
2559 /* Return true if N is a possible register number of function value. */
2561 ix86_function_value_regno_p (regno)
2566 return ((regno) == 0
2567 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2568 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2570 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2571 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2572 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2575 /* Define how to find the value returned by a function.
2576 VALTYPE is the data type of the value (as a tree).
2577 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2578 otherwise, FUNC is 0. */
2580 ix86_function_value (valtype)
2585 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2586 REGPARM_MAX, SSE_REGPARM_MAX,
2587 x86_64_int_return_registers, 0);
2588 /* For zero sized structures, construct_container return NULL, but we need
2589 to keep rest of compiler happy by returning meaningful value. */
2591 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2595 return gen_rtx_REG (TYPE_MODE (valtype),
2596 ix86_value_regno (TYPE_MODE (valtype)));
2599 /* Return false iff type is returned in memory. */
2601 ix86_return_in_memory (type)
2604 int needed_intregs, needed_sseregs;
2607 return !examine_argument (TYPE_MODE (type), type, 1,
2608 &needed_intregs, &needed_sseregs);
2612 if (TYPE_MODE (type) == BLKmode)
2614 else if (MS_AGGREGATE_RETURN
2615 && AGGREGATE_TYPE_P (type)
2616 && int_size_in_bytes(type) <= 8)
2618 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2619 && int_size_in_bytes (type) == 8)
2620 || (int_size_in_bytes (type) > 12
2621 && TYPE_MODE (type) != TImode
2622 && TYPE_MODE (type) != TFmode
2623 && !VECTOR_MODE_P (TYPE_MODE (type))))
2629 /* Define how to find the value returned by a library function
2630 assuming the value has mode MODE. */
2632 ix86_libcall_value (mode)
2633 enum machine_mode mode;
2643 return gen_rtx_REG (mode, FIRST_SSE_REG);
2646 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2648 return gen_rtx_REG (mode, 0);
2652 return gen_rtx_REG (mode, ix86_value_regno (mode));
2655 /* Given a mode, return the register to use for a return value. */
2658 ix86_value_regno (mode)
2659 enum machine_mode mode;
2661 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2662 return FIRST_FLOAT_REG;
2663 if (mode == TImode || VECTOR_MODE_P (mode))
2664 return FIRST_SSE_REG;
2668 /* Create the va_list data type. */
2671 ix86_build_va_list ()
2673 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2675 /* For i386 we use plain pointer to argument area. */
2677 return build_pointer_type (char_type_node);
2679 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2680 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2682 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2683 unsigned_type_node);
2684 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2685 unsigned_type_node);
2686 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2688 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2691 DECL_FIELD_CONTEXT (f_gpr) = record;
2692 DECL_FIELD_CONTEXT (f_fpr) = record;
2693 DECL_FIELD_CONTEXT (f_ovf) = record;
2694 DECL_FIELD_CONTEXT (f_sav) = record;
2696 TREE_CHAIN (record) = type_decl;
2697 TYPE_NAME (record) = type_decl;
2698 TYPE_FIELDS (record) = f_gpr;
2699 TREE_CHAIN (f_gpr) = f_fpr;
2700 TREE_CHAIN (f_fpr) = f_ovf;
2701 TREE_CHAIN (f_ovf) = f_sav;
2703 layout_type (record);
2705 /* The correct type is an array type of one element. */
2706 return build_array_type (record, build_index_type (size_zero_node));
2709 /* Perform any needed actions needed for a function that is receiving a
2710 variable number of arguments.
2714 MODE and TYPE are the mode and type of the current parameter.
2716 PRETEND_SIZE is a variable that should be set to the amount of stack
2717 that must be pushed by the prolog to pretend that our caller pushed
2720 Normally, this macro will push all remaining incoming registers on the
2721 stack and set PRETEND_SIZE to the length of the registers pushed. */
2724 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2725 CUMULATIVE_ARGS *cum;
2726 enum machine_mode mode;
2728 int *pretend_size ATTRIBUTE_UNUSED;
2732 CUMULATIVE_ARGS next_cum;
2733 rtx save_area = NULL_RTX, mem;
2746 /* Indicate to allocate space on the stack for varargs save area. */
2747 ix86_save_varrargs_registers = 1;
2749 fntype = TREE_TYPE (current_function_decl);
2750 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2751 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2752 != void_type_node));
2754 /* For varargs, we do not want to skip the dummy va_dcl argument.
2755 For stdargs, we do want to skip the last named argument. */
2758 function_arg_advance (&next_cum, mode, type, 1);
2761 save_area = frame_pointer_rtx;
2763 set = get_varargs_alias_set ();
2765 for (i = next_cum.regno; i < ix86_regparm; i++)
2767 mem = gen_rtx_MEM (Pmode,
2768 plus_constant (save_area, i * UNITS_PER_WORD));
2769 set_mem_alias_set (mem, set);
2770 emit_move_insn (mem, gen_rtx_REG (Pmode,
2771 x86_64_int_parameter_registers[i]));
2774 if (next_cum.sse_nregs)
2776 /* Now emit code to save SSE registers. The AX parameter contains number
2777 of SSE parameter registers used to call this function. We use
2778 sse_prologue_save insn template that produces computed jump across
2779 SSE saves. We need some preparation work to get this working. */
2781 label = gen_label_rtx ();
2782 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2784 /* Compute address to jump to :
2785 label - 5*eax + nnamed_sse_arguments*5 */
2786 tmp_reg = gen_reg_rtx (Pmode);
2787 nsse_reg = gen_reg_rtx (Pmode);
2788 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2789 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2790 gen_rtx_MULT (Pmode, nsse_reg,
2792 if (next_cum.sse_regno)
2795 gen_rtx_CONST (DImode,
2796 gen_rtx_PLUS (DImode,
2798 GEN_INT (next_cum.sse_regno * 4))));
2800 emit_move_insn (nsse_reg, label_ref);
2801 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2803 /* Compute address of memory block we save into. We always use pointer
2804 pointing 127 bytes after first byte to store - this is needed to keep
2805 instruction size limited by 4 bytes. */
2806 tmp_reg = gen_reg_rtx (Pmode);
2807 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2808 plus_constant (save_area,
2809 8 * REGPARM_MAX + 127)));
2810 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2811 set_mem_alias_set (mem, set);
2812 set_mem_align (mem, BITS_PER_WORD);
2814 /* And finally do the dirty job! */
2815 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2816 GEN_INT (next_cum.sse_regno), label));
2821 /* Implement va_start. */
2824 ix86_va_start (valist, nextarg)
2828 HOST_WIDE_INT words, n_gpr, n_fpr;
2829 tree f_gpr, f_fpr, f_ovf, f_sav;
2830 tree gpr, fpr, ovf, sav, t;
2832 /* Only 64bit target needs something special. */
2835 std_expand_builtin_va_start (valist, nextarg);
2839 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2840 f_fpr = TREE_CHAIN (f_gpr);
2841 f_ovf = TREE_CHAIN (f_fpr);
2842 f_sav = TREE_CHAIN (f_ovf);
2844 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2845 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2846 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2847 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2848 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2850 /* Count number of gp and fp argument registers used. */
2851 words = current_function_args_info.words;
2852 n_gpr = current_function_args_info.regno;
2853 n_fpr = current_function_args_info.sse_regno;
2855 if (TARGET_DEBUG_ARG)
2856 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2857 (int) words, (int) n_gpr, (int) n_fpr);
2859 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2860 build_int_2 (n_gpr * 8, 0));
2861 TREE_SIDE_EFFECTS (t) = 1;
2862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2864 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2865 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2866 TREE_SIDE_EFFECTS (t) = 1;
2867 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2869 /* Find the overflow area. */
2870 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2872 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2873 build_int_2 (words * UNITS_PER_WORD, 0));
2874 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2875 TREE_SIDE_EFFECTS (t) = 1;
2876 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2878 /* Find the register save area.
2879 Prologue of the function save it right above stack frame. */
2880 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2881 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2882 TREE_SIDE_EFFECTS (t) = 1;
2883 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2886 /* Implement va_arg. */
2888 ix86_va_arg (valist, type)
2891 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2892 tree f_gpr, f_fpr, f_ovf, f_sav;
2893 tree gpr, fpr, ovf, sav, t;
2895 rtx lab_false, lab_over = NULL_RTX;
2900 /* Only 64bit target needs something special. */
2903 return std_expand_builtin_va_arg (valist, type);
2906 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2907 f_fpr = TREE_CHAIN (f_gpr);
2908 f_ovf = TREE_CHAIN (f_fpr);
2909 f_sav = TREE_CHAIN (f_ovf);
2911 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2912 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2913 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2914 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2915 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2917 size = int_size_in_bytes (type);
2920 /* Passed by reference. */
2922 type = build_pointer_type (type);
2923 size = int_size_in_bytes (type);
2925 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2927 container = construct_container (TYPE_MODE (type), type, 0,
2928 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2930 * Pull the value out of the saved registers ...
2933 addr_rtx = gen_reg_rtx (Pmode);
2937 rtx int_addr_rtx, sse_addr_rtx;
2938 int needed_intregs, needed_sseregs;
2941 lab_over = gen_label_rtx ();
2942 lab_false = gen_label_rtx ();
2944 examine_argument (TYPE_MODE (type), type, 0,
2945 &needed_intregs, &needed_sseregs);
2948 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2949 || TYPE_ALIGN (type) > 128);
2951 /* In case we are passing structure, verify that it is consecutive block
2952 on the register save area. If not we need to do moves. */
2953 if (!need_temp && !REG_P (container))
2955 /* Verify that all registers are strictly consecutive */
2956 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2960 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2962 rtx slot = XVECEXP (container, 0, i);
2963 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2964 || INTVAL (XEXP (slot, 1)) != i * 16)
2972 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2974 rtx slot = XVECEXP (container, 0, i);
2975 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2976 || INTVAL (XEXP (slot, 1)) != i * 8)
2983 int_addr_rtx = addr_rtx;
2984 sse_addr_rtx = addr_rtx;
2988 int_addr_rtx = gen_reg_rtx (Pmode);
2989 sse_addr_rtx = gen_reg_rtx (Pmode);
2991 /* First ensure that we fit completely in registers. */
2994 emit_cmp_and_jump_insns (expand_expr
2995 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2996 GEN_INT ((REGPARM_MAX - needed_intregs +
2997 1) * 8), GE, const1_rtx, SImode,
3002 emit_cmp_and_jump_insns (expand_expr
3003 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3004 GEN_INT ((SSE_REGPARM_MAX -
3005 needed_sseregs + 1) * 16 +
3006 REGPARM_MAX * 8), GE, const1_rtx,
3007 SImode, 1, lab_false);
3010 /* Compute index to start of area used for integer regs. */
3013 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3014 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3015 if (r != int_addr_rtx)
3016 emit_move_insn (int_addr_rtx, r);
3020 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3021 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3022 if (r != sse_addr_rtx)
3023 emit_move_insn (sse_addr_rtx, r);
3030 /* Never use the memory itself, as it has the alias set. */
3031 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3032 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3033 set_mem_alias_set (mem, get_varargs_alias_set ());
3034 set_mem_align (mem, BITS_PER_UNIT);
3036 for (i = 0; i < XVECLEN (container, 0); i++)
3038 rtx slot = XVECEXP (container, 0, i);
3039 rtx reg = XEXP (slot, 0);
3040 enum machine_mode mode = GET_MODE (reg);
3046 if (SSE_REGNO_P (REGNO (reg)))
3048 src_addr = sse_addr_rtx;
3049 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3053 src_addr = int_addr_rtx;
3054 src_offset = REGNO (reg) * 8;
3056 src_mem = gen_rtx_MEM (mode, src_addr);
3057 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3058 src_mem = adjust_address (src_mem, mode, src_offset);
3059 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3060 emit_move_insn (dest_mem, src_mem);
3067 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3068 build_int_2 (needed_intregs * 8, 0));
3069 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3070 TREE_SIDE_EFFECTS (t) = 1;
3071 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3076 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3077 build_int_2 (needed_sseregs * 16, 0));
3078 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3079 TREE_SIDE_EFFECTS (t) = 1;
3080 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3083 emit_jump_insn (gen_jump (lab_over));
3085 emit_label (lab_false);
3088 /* ... otherwise out of the overflow area. */
3090 /* Care for on-stack alignment if needed. */
3091 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3095 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3096 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3097 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3101 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3103 emit_move_insn (addr_rtx, r);
3106 build (PLUS_EXPR, TREE_TYPE (t), t,
3107 build_int_2 (rsize * UNITS_PER_WORD, 0));
3108 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3109 TREE_SIDE_EFFECTS (t) = 1;
3110 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3113 emit_label (lab_over);
3117 r = gen_rtx_MEM (Pmode, addr_rtx);
3118 set_mem_alias_set (r, get_varargs_alias_set ());
3119 emit_move_insn (addr_rtx, r);
3125 /* Return nonzero if OP is either a i387 or SSE fp register. */
3127 any_fp_register_operand (op, mode)
3129 enum machine_mode mode ATTRIBUTE_UNUSED;
3131 return ANY_FP_REG_P (op);
3134 /* Return nonzero if OP is an i387 fp register. */
3136 fp_register_operand (op, mode)
3138 enum machine_mode mode ATTRIBUTE_UNUSED;
3140 return FP_REG_P (op);
3143 /* Return nonzero if OP is a non-fp register_operand. */
3145 register_and_not_any_fp_reg_operand (op, mode)
3147 enum machine_mode mode;
3149 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3152 /* Return nonzero if OP is a register operand other than an
3153 i387 fp register. */
3155 register_and_not_fp_reg_operand (op, mode)
3157 enum machine_mode mode;
3159 return register_operand (op, mode) && !FP_REG_P (op);
3162 /* Return nonzero if OP is general operand representable on x86_64. */
3165 x86_64_general_operand (op, mode)
3167 enum machine_mode mode;
3170 return general_operand (op, mode);
3171 if (nonimmediate_operand (op, mode))
3173 return x86_64_sign_extended_value (op);
3176 /* Return nonzero if OP is general operand representable on x86_64
3177 as either sign extended or zero extended constant. */
3180 x86_64_szext_general_operand (op, mode)
3182 enum machine_mode mode;
3185 return general_operand (op, mode);
3186 if (nonimmediate_operand (op, mode))
3188 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3191 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3194 x86_64_nonmemory_operand (op, mode)
3196 enum machine_mode mode;
3199 return nonmemory_operand (op, mode);
3200 if (register_operand (op, mode))
3202 return x86_64_sign_extended_value (op);
3205 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3208 x86_64_movabs_operand (op, mode)
3210 enum machine_mode mode;
3212 if (!TARGET_64BIT || !flag_pic)
3213 return nonmemory_operand (op, mode);
3214 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3216 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3221 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3224 x86_64_szext_nonmemory_operand (op, mode)
3226 enum machine_mode mode;
3229 return nonmemory_operand (op, mode);
3230 if (register_operand (op, mode))
3232 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3235 /* Return nonzero if OP is immediate operand representable on x86_64. */
3238 x86_64_immediate_operand (op, mode)
3240 enum machine_mode mode;
3243 return immediate_operand (op, mode);
3244 return x86_64_sign_extended_value (op);
3247 /* Return nonzero if OP is immediate operand representable on x86_64. */
3250 x86_64_zext_immediate_operand (op, mode)
3252 enum machine_mode mode ATTRIBUTE_UNUSED;
3254 return x86_64_zero_extended_value (op);
3257 /* Return nonzero if OP is (const_int 1), else return zero. */
3260 const_int_1_operand (op, mode)
3262 enum machine_mode mode ATTRIBUTE_UNUSED;
3264 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3267 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3268 for shift & compare patterns, as shifting by 0 does not change flags),
3269 else return zero. */
3272 const_int_1_31_operand (op, mode)
3274 enum machine_mode mode ATTRIBUTE_UNUSED;
3276 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3279 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3280 reference and a constant. */
3283 symbolic_operand (op, mode)
3285 enum machine_mode mode ATTRIBUTE_UNUSED;
3287 switch (GET_CODE (op))
3295 if (GET_CODE (op) == SYMBOL_REF
3296 || GET_CODE (op) == LABEL_REF
3297 || (GET_CODE (op) == UNSPEC
3298 && (XINT (op, 1) == UNSPEC_GOT
3299 || XINT (op, 1) == UNSPEC_GOTOFF
3300 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3302 if (GET_CODE (op) != PLUS
3303 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3307 if (GET_CODE (op) == SYMBOL_REF
3308 || GET_CODE (op) == LABEL_REF)
3310 /* Only @GOTOFF gets offsets. */
3311 if (GET_CODE (op) != UNSPEC
3312 || XINT (op, 1) != UNSPEC_GOTOFF)
3315 op = XVECEXP (op, 0, 0);
3316 if (GET_CODE (op) == SYMBOL_REF
3317 || GET_CODE (op) == LABEL_REF)
3326 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3329 pic_symbolic_operand (op, mode)
3331 enum machine_mode mode ATTRIBUTE_UNUSED;
3333 if (GET_CODE (op) != CONST)
3338 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3343 if (GET_CODE (op) == UNSPEC)
3345 if (GET_CODE (op) != PLUS
3346 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3349 if (GET_CODE (op) == UNSPEC)
3355 /* Return true if OP is a symbolic operand that resolves locally. */
3358 local_symbolic_operand (op, mode)
3360 enum machine_mode mode ATTRIBUTE_UNUSED;
3362 if (GET_CODE (op) == CONST
3363 && GET_CODE (XEXP (op, 0)) == PLUS
3364 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3365 op = XEXP (XEXP (op, 0), 0);
3367 if (GET_CODE (op) == LABEL_REF)
3370 if (GET_CODE (op) != SYMBOL_REF)
3373 /* These we've been told are local by varasm and encode_section_info
3375 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3378 /* There is, however, a not insubstantial body of code in the rest of
3379 the compiler that assumes it can just stick the results of
3380 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3381 /* ??? This is a hack. Should update the body of the compiler to
3382 always create a DECL an invoke targetm.encode_section_info. */
3383 if (strncmp (XSTR (op, 0), internal_label_prefix,
3384 internal_label_prefix_len) == 0)
3390 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3393 tls_symbolic_operand (op, mode)
3395 enum machine_mode mode ATTRIBUTE_UNUSED;
3397 const char *symbol_str;
3399 if (GET_CODE (op) != SYMBOL_REF)
3401 symbol_str = XSTR (op, 0);
3403 if (symbol_str[0] != '%')
3405 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3409 tls_symbolic_operand_1 (op, kind)
3411 enum tls_model kind;
3413 const char *symbol_str;
3415 if (GET_CODE (op) != SYMBOL_REF)
3417 symbol_str = XSTR (op, 0);
3419 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3423 global_dynamic_symbolic_operand (op, mode)
3425 enum machine_mode mode ATTRIBUTE_UNUSED;
3427 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3431 local_dynamic_symbolic_operand (op, mode)
3433 enum machine_mode mode ATTRIBUTE_UNUSED;
3435 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3439 initial_exec_symbolic_operand (op, mode)
3441 enum machine_mode mode ATTRIBUTE_UNUSED;
3443 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3447 local_exec_symbolic_operand (op, mode)
3449 enum machine_mode mode ATTRIBUTE_UNUSED;
3451 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3454 /* Test for a valid operand for a call instruction. Don't allow the
3455 arg pointer register or virtual regs since they may decay into
3456 reg + const, which the patterns can't handle. */
3459 call_insn_operand (op, mode)
3461 enum machine_mode mode ATTRIBUTE_UNUSED;
3463 /* Disallow indirect through a virtual register. This leads to
3464 compiler aborts when trying to eliminate them. */
3465 if (GET_CODE (op) == REG
3466 && (op == arg_pointer_rtx
3467 || op == frame_pointer_rtx
3468 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3469 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3472 /* Disallow `call 1234'. Due to varying assembler lameness this
3473 gets either rejected or translated to `call .+1234'. */
3474 if (GET_CODE (op) == CONST_INT)
3477 /* Explicitly allow SYMBOL_REF even if pic. */
3478 if (GET_CODE (op) == SYMBOL_REF)
3481 /* Otherwise we can allow any general_operand in the address. */
3482 return general_operand (op, Pmode);
3485 /* Test for a valid operand for a call instruction. Don't allow the
3486 arg pointer register or virtual regs since they may decay into
3487 reg + const, which the patterns can't handle. */
3490 sibcall_insn_operand (op, mode)
3492 enum machine_mode mode ATTRIBUTE_UNUSED;
3494 /* Disallow indirect through a virtual register. This leads to
3495 compiler aborts when trying to eliminate them. */
3496 if (GET_CODE (op) == REG
3497 && (op == arg_pointer_rtx
3498 || op == frame_pointer_rtx
3499 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3500 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3503 /* Explicitly allow SYMBOL_REF even if pic. */
3504 if (GET_CODE (op) == SYMBOL_REF)
3507 /* Otherwise we can only allow register operands. */
3508 return register_operand (op, Pmode);
3512 constant_call_address_operand (op, mode)
3514 enum machine_mode mode ATTRIBUTE_UNUSED;
3516 if (GET_CODE (op) == CONST
3517 && GET_CODE (XEXP (op, 0)) == PLUS
3518 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3519 op = XEXP (XEXP (op, 0), 0);
3520 return GET_CODE (op) == SYMBOL_REF;
3523 /* Match exactly zero and one. */
3526 const0_operand (op, mode)
3528 enum machine_mode mode;
3530 return op == CONST0_RTX (mode);
3534 const1_operand (op, mode)
3536 enum machine_mode mode ATTRIBUTE_UNUSED;
3538 return op == const1_rtx;
3541 /* Match 2, 4, or 8. Used for leal multiplicands. */
3544 const248_operand (op, mode)
3546 enum machine_mode mode ATTRIBUTE_UNUSED;
3548 return (GET_CODE (op) == CONST_INT
3549 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3552 /* True if this is a constant appropriate for an increment or decrement. */
3555 incdec_operand (op, mode)
3557 enum machine_mode mode ATTRIBUTE_UNUSED;
3559 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3560 registers, since carry flag is not set. */
3561 if (TARGET_PENTIUM4 && !optimize_size)
3563 return op == const1_rtx || op == constm1_rtx;
3566 /* Return nonzero if OP is acceptable as operand of DImode shift
3570 shiftdi_operand (op, mode)
3572 enum machine_mode mode ATTRIBUTE_UNUSED;
3575 return nonimmediate_operand (op, mode);
3577 return register_operand (op, mode);
3580 /* Return false if this is the stack pointer, or any other fake
3581 register eliminable to the stack pointer. Otherwise, this is
3584 This is used to prevent esp from being used as an index reg.
3585 Which would only happen in pathological cases. */
3588 reg_no_sp_operand (op, mode)
3590 enum machine_mode mode;
3593 if (GET_CODE (t) == SUBREG)
3595 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3598 return register_operand (op, mode);
3602 mmx_reg_operand (op, mode)
3604 enum machine_mode mode ATTRIBUTE_UNUSED;
3606 return MMX_REG_P (op);
3609 /* Return false if this is any eliminable register. Otherwise
3613 general_no_elim_operand (op, mode)
3615 enum machine_mode mode;
3618 if (GET_CODE (t) == SUBREG)
3620 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3621 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3622 || t == virtual_stack_dynamic_rtx)
3625 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3626 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3629 return general_operand (op, mode);
3632 /* Return false if this is any eliminable register. Otherwise
3633 register_operand or const_int. */
3636 nonmemory_no_elim_operand (op, mode)
3638 enum machine_mode mode;
3641 if (GET_CODE (t) == SUBREG)
3643 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3644 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3645 || t == virtual_stack_dynamic_rtx)
3648 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3651 /* Return false if this is any eliminable register or stack register,
3652 otherwise work like register_operand. */
3655 index_register_operand (op, mode)
3657 enum machine_mode mode;
3660 if (GET_CODE (t) == SUBREG)
3664 if (t == arg_pointer_rtx
3665 || t == frame_pointer_rtx
3666 || t == virtual_incoming_args_rtx
3667 || t == virtual_stack_vars_rtx
3668 || t == virtual_stack_dynamic_rtx
3669 || REGNO (t) == STACK_POINTER_REGNUM)
3672 return general_operand (op, mode);
3675 /* Return true if op is a Q_REGS class register. */
3678 q_regs_operand (op, mode)
3680 enum machine_mode mode;
3682 if (mode != VOIDmode && GET_MODE (op) != mode)
3684 if (GET_CODE (op) == SUBREG)
3685 op = SUBREG_REG (op);
3686 return ANY_QI_REG_P (op);
3689 /* Return true if op is an flags register. */
3692 flags_reg_operand (op, mode)
3694 enum machine_mode mode;
3696 if (mode != VOIDmode && GET_MODE (op) != mode)
3698 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3701 /* Return true if op is a NON_Q_REGS class register. */
3704 non_q_regs_operand (op, mode)
3706 enum machine_mode mode;
3708 if (mode != VOIDmode && GET_MODE (op) != mode)
3710 if (GET_CODE (op) == SUBREG)
3711 op = SUBREG_REG (op);
3712 return NON_QI_REG_P (op);
3716 zero_extended_scalar_load_operand (op, mode)
3718 enum machine_mode mode ATTRIBUTE_UNUSED;
3721 if (GET_CODE (op) != MEM)
3723 op = maybe_get_pool_constant (op);
3726 if (GET_CODE (op) != CONST_VECTOR)
3729 (GET_MODE_SIZE (GET_MODE (op)) /
3730 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3731 for (n_elts--; n_elts > 0; n_elts--)
3733 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3734 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3740 /* Return 1 when OP is operand acceptable for standard SSE move. */
3742 vector_move_operand (op, mode)
3744 enum machine_mode mode;
3746 if (nonimmediate_operand (op, mode))
3748 if (GET_MODE (op) != mode && mode != VOIDmode)
3750 return (op == CONST0_RTX (GET_MODE (op)));
3753 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3756 sse_comparison_operator (op, mode)
3758 enum machine_mode mode ATTRIBUTE_UNUSED;
3760 enum rtx_code code = GET_CODE (op);
3763 /* Operations supported directly. */
3773 /* These are equivalent to ones above in non-IEEE comparisons. */
3780 return !TARGET_IEEE_FP;
3785 /* Return 1 if OP is a valid comparison operator in valid mode. */
3787 ix86_comparison_operator (op, mode)
3789 enum machine_mode mode;
3791 enum machine_mode inmode;
3792 enum rtx_code code = GET_CODE (op);
3793 if (mode != VOIDmode && GET_MODE (op) != mode)
3795 if (GET_RTX_CLASS (code) != '<')
3797 inmode = GET_MODE (XEXP (op, 0));
3799 if (inmode == CCFPmode || inmode == CCFPUmode)
3801 enum rtx_code second_code, bypass_code;
3802 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3803 return (bypass_code == NIL && second_code == NIL);
3810 if (inmode == CCmode || inmode == CCGCmode
3811 || inmode == CCGOCmode || inmode == CCNOmode)
3814 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3815 if (inmode == CCmode)
3819 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3827 /* Return 1 if OP is a valid comparison operator testing carry flag
3830 ix86_carry_flag_operator (op, mode)
3832 enum machine_mode mode;
3834 enum machine_mode inmode;
3835 enum rtx_code code = GET_CODE (op);
3837 if (mode != VOIDmode && GET_MODE (op) != mode)
3839 if (GET_RTX_CLASS (code) != '<')
3841 inmode = GET_MODE (XEXP (op, 0));
3842 if (GET_CODE (XEXP (op, 0)) != REG
3843 || REGNO (XEXP (op, 0)) != 17
3844 || XEXP (op, 1) != const0_rtx)
3847 if (inmode == CCFPmode || inmode == CCFPUmode)
3849 enum rtx_code second_code, bypass_code;
3851 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3852 if (bypass_code != NIL || second_code != NIL)
3854 code = ix86_fp_compare_code_to_integer (code);
3856 else if (inmode != CCmode)
3861 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3864 fcmov_comparison_operator (op, mode)
3866 enum machine_mode mode;
3868 enum machine_mode inmode;
3869 enum rtx_code code = GET_CODE (op);
3871 if (mode != VOIDmode && GET_MODE (op) != mode)
3873 if (GET_RTX_CLASS (code) != '<')
3875 inmode = GET_MODE (XEXP (op, 0));
3876 if (inmode == CCFPmode || inmode == CCFPUmode)
3878 enum rtx_code second_code, bypass_code;
3880 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3881 if (bypass_code != NIL || second_code != NIL)
3883 code = ix86_fp_compare_code_to_integer (code);
3885 /* i387 supports just limited amount of conditional codes. */
3888 case LTU: case GTU: case LEU: case GEU:
3889 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3892 case ORDERED: case UNORDERED:
3900 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3903 promotable_binary_operator (op, mode)
3905 enum machine_mode mode ATTRIBUTE_UNUSED;
3907 switch (GET_CODE (op))
3910 /* Modern CPUs have same latency for HImode and SImode multiply,
3911 but 386 and 486 do HImode multiply faster. */
3912 return ix86_cpu > PROCESSOR_I486;
3924 /* Nearly general operand, but accept any const_double, since we wish
3925 to be able to drop them into memory rather than have them get pulled
3929 cmp_fp_expander_operand (op, mode)
3931 enum machine_mode mode;
3933 if (mode != VOIDmode && mode != GET_MODE (op))
3935 if (GET_CODE (op) == CONST_DOUBLE)
3937 return general_operand (op, mode);
3940 /* Match an SI or HImode register for a zero_extract. */
3943 ext_register_operand (op, mode)
3945 enum machine_mode mode ATTRIBUTE_UNUSED;
3948 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3949 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3952 if (!register_operand (op, VOIDmode))
3955 /* Be careful to accept only registers having upper parts. */
3956 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3957 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3960 /* Return 1 if this is a valid binary floating-point operation.
3961 OP is the expression matched, and MODE is its mode. */
3964 binary_fp_operator (op, mode)
3966 enum machine_mode mode;
3968 if (mode != VOIDmode && mode != GET_MODE (op))
3971 switch (GET_CODE (op))
3977 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3985 mult_operator (op, mode)
3987 enum machine_mode mode ATTRIBUTE_UNUSED;
3989 return GET_CODE (op) == MULT;
3993 div_operator (op, mode)
3995 enum machine_mode mode ATTRIBUTE_UNUSED;
3997 return GET_CODE (op) == DIV;
4001 arith_or_logical_operator (op, mode)
4003 enum machine_mode mode;
4005 return ((mode == VOIDmode || GET_MODE (op) == mode)
4006 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4007 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4010 /* Returns 1 if OP is memory operand with a displacement. */
4013 memory_displacement_operand (op, mode)
4015 enum machine_mode mode;
4017 struct ix86_address parts;
4019 if (! memory_operand (op, mode))
4022 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4025 return parts.disp != NULL_RTX;
4028 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4029 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4031 ??? It seems likely that this will only work because cmpsi is an
4032 expander, and no actual insns use this. */
4035 cmpsi_operand (op, mode)
4037 enum machine_mode mode;
4039 if (nonimmediate_operand (op, mode))
4042 if (GET_CODE (op) == AND
4043 && GET_MODE (op) == SImode
4044 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4045 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4046 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4047 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4048 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4049 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4055 /* Returns 1 if OP is memory operand that can not be represented by the
4059 long_memory_operand (op, mode)
4061 enum machine_mode mode;
4063 if (! memory_operand (op, mode))
4066 return memory_address_length (op) != 0;
4069 /* Return nonzero if the rtx is known aligned. */
4072 aligned_operand (op, mode)
4074 enum machine_mode mode;
4076 struct ix86_address parts;
4078 if (!general_operand (op, mode))
4081 /* Registers and immediate operands are always "aligned". */
4082 if (GET_CODE (op) != MEM)
4085 /* Don't even try to do any aligned optimizations with volatiles. */
4086 if (MEM_VOLATILE_P (op))
4091 /* Pushes and pops are only valid on the stack pointer. */
4092 if (GET_CODE (op) == PRE_DEC
4093 || GET_CODE (op) == POST_INC)
4096 /* Decode the address. */
4097 if (! ix86_decompose_address (op, &parts))
4100 if (parts.base && GET_CODE (parts.base) == SUBREG)
4101 parts.base = SUBREG_REG (parts.base);
4102 if (parts.index && GET_CODE (parts.index) == SUBREG)
4103 parts.index = SUBREG_REG (parts.index);
4105 /* Look for some component that isn't known to be aligned. */
4109 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4114 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4119 if (GET_CODE (parts.disp) != CONST_INT
4120 || (INTVAL (parts.disp) & 3) != 0)
4124 /* Didn't find one -- this must be an aligned address. */
4128 /* Return true if the constant is something that can be loaded with
4129 a special instruction. Only handle 0.0 and 1.0; others are less
4133 standard_80387_constant_p (x)
4136 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4138 /* Note that on the 80387, other constants, such as pi, that we should support
4139 too. On some machines, these are much slower to load as standard constant,
4140 than to load from doubles in memory. */
4141 if (x == CONST0_RTX (GET_MODE (x)))
4143 if (x == CONST1_RTX (GET_MODE (x)))
4148 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4151 standard_sse_constant_p (x)
4154 if (x == const0_rtx)
4156 return (x == CONST0_RTX (GET_MODE (x)));
4159 /* Returns 1 if OP contains a symbol reference */
4162 symbolic_reference_mentioned_p (op)
4165 register const char *fmt;
4168 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4171 fmt = GET_RTX_FORMAT (GET_CODE (op));
4172 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4178 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4179 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4183 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4190 /* Return 1 if it is appropriate to emit `ret' instructions in the
4191 body of a function. Do this only if the epilogue is simple, needing a
4192 couple of insns. Prior to reloading, we can't tell how many registers
4193 must be saved, so return 0 then. Return 0 if there is no frame
4194 marker to de-allocate.
4196 If NON_SAVING_SETJMP is defined and true, then it is not possible
4197 for the epilogue to be simple, so return 0. This is a special case
4198 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4199 until final, but jump_optimize may need to know sooner if a
4203 ix86_can_use_return_insn_p ()
4205 struct ix86_frame frame;
4207 #ifdef NON_SAVING_SETJMP
4208 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4212 if (! reload_completed || frame_pointer_needed)
4215 /* Don't allow more than 32 pop, since that's all we can do
4216 with one instruction. */
4217 if (current_function_pops_args
4218 && current_function_args_size >= 32768)
4221 ix86_compute_frame_layout (&frame);
4222 return frame.to_allocate == 0 && frame.nregs == 0;
4225 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4227 x86_64_sign_extended_value (value)
4230 switch (GET_CODE (value))
4232 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4233 to be at least 32 and this all acceptable constants are
4234 represented as CONST_INT. */
4236 if (HOST_BITS_PER_WIDE_INT == 32)
4240 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4241 return trunc_int_for_mode (val, SImode) == val;
4245 /* For certain code models, the symbolic references are known to fit.
4246 in CM_SMALL_PIC model we know it fits if it is local to the shared
4247 library. Don't count TLS SYMBOL_REFs here, since they should fit
4248 only if inside of UNSPEC handled below. */
4250 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4252 /* For certain code models, the code is near as well. */
4254 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4255 || ix86_cmodel == CM_KERNEL);
4257 /* We also may accept the offsetted memory references in certain special
4260 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4261 switch (XINT (XEXP (value, 0), 1))
4263 case UNSPEC_GOTPCREL:
4265 case UNSPEC_GOTNTPOFF:
4271 if (GET_CODE (XEXP (value, 0)) == PLUS)
4273 rtx op1 = XEXP (XEXP (value, 0), 0);
4274 rtx op2 = XEXP (XEXP (value, 0), 1);
4275 HOST_WIDE_INT offset;
4277 if (ix86_cmodel == CM_LARGE)
4279 if (GET_CODE (op2) != CONST_INT)
4281 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4282 switch (GET_CODE (op1))
4285 /* For CM_SMALL assume that latest object is 16MB before
4286 end of 31bits boundary. We may also accept pretty
4287 large negative constants knowing that all objects are
4288 in the positive half of address space. */
4289 if (ix86_cmodel == CM_SMALL
4290 && offset < 16*1024*1024
4291 && trunc_int_for_mode (offset, SImode) == offset)
4293 /* For CM_KERNEL we know that all object resist in the
4294 negative half of 32bits address space. We may not
4295 accept negative offsets, since they may be just off
4296 and we may accept pretty large positive ones. */
4297 if (ix86_cmodel == CM_KERNEL
4299 && trunc_int_for_mode (offset, SImode) == offset)
4303 /* These conditions are similar to SYMBOL_REF ones, just the
4304 constraints for code models differ. */
4305 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4306 && offset < 16*1024*1024
4307 && trunc_int_for_mode (offset, SImode) == offset)
4309 if (ix86_cmodel == CM_KERNEL
4311 && trunc_int_for_mode (offset, SImode) == offset)
4315 switch (XINT (op1, 1))
4320 && trunc_int_for_mode (offset, SImode) == offset)
4334 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4336 x86_64_zero_extended_value (value)
4339 switch (GET_CODE (value))
4342 if (HOST_BITS_PER_WIDE_INT == 32)
4343 return (GET_MODE (value) == VOIDmode
4344 && !CONST_DOUBLE_HIGH (value));
4348 if (HOST_BITS_PER_WIDE_INT == 32)
4349 return INTVAL (value) >= 0;
4351 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4354 /* For certain code models, the symbolic references are known to fit. */
4356 return ix86_cmodel == CM_SMALL;
4358 /* For certain code models, the code is near as well. */
4360 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4362 /* We also may accept the offsetted memory references in certain special
4365 if (GET_CODE (XEXP (value, 0)) == PLUS)
4367 rtx op1 = XEXP (XEXP (value, 0), 0);
4368 rtx op2 = XEXP (XEXP (value, 0), 1);
4370 if (ix86_cmodel == CM_LARGE)
4372 switch (GET_CODE (op1))
4376 /* For small code model we may accept pretty large positive
4377 offsets, since one bit is available for free. Negative
4378 offsets are limited by the size of NULL pointer area
4379 specified by the ABI. */
4380 if (ix86_cmodel == CM_SMALL
4381 && GET_CODE (op2) == CONST_INT
4382 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4383 && (trunc_int_for_mode (INTVAL (op2), SImode)
4386 /* ??? For the kernel, we may accept adjustment of
4387 -0x10000000, since we know that it will just convert
4388 negative address space to positive, but perhaps this
4389 is not worthwhile. */
4392 /* These conditions are similar to SYMBOL_REF ones, just the
4393 constraints for code models differ. */
4394 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4395 && GET_CODE (op2) == CONST_INT
4396 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4397 && (trunc_int_for_mode (INTVAL (op2), SImode)
4411 /* Value should be nonzero if functions must have frame pointers.
4412 Zero means the frame pointer need not be set up (and parms may
4413 be accessed via the stack pointer) in functions that seem suitable. */
4416 ix86_frame_pointer_required ()
4418 /* If we accessed previous frames, then the generated code expects
4419 to be able to access the saved ebp value in our frame. */
4420 if (cfun->machine->accesses_prev_frame)
4423 /* Several x86 os'es need a frame pointer for other reasons,
4424 usually pertaining to setjmp. */
4425 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4428 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4429 the frame pointer by default. Turn it back on now if we've not
4430 got a leaf function. */
4431 if (TARGET_OMIT_LEAF_FRAME_POINTER
4432 && (!current_function_is_leaf))
4435 if (current_function_profile)
4441 /* Record that the current function accesses previous call frames. */
4444 ix86_setup_frame_addresses ()
4446 cfun->machine->accesses_prev_frame = 1;
4449 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4450 # define USE_HIDDEN_LINKONCE 1
4452 # define USE_HIDDEN_LINKONCE 0
4455 static int pic_labels_used;
4457 /* Fills in the label name that should be used for a pc thunk for
4458 the given register. */
4461 get_pc_thunk_name (name, regno)
4465 if (USE_HIDDEN_LINKONCE)
4466 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4468 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4472 /* This function generates code for -fpic that loads %ebx with
4473 the return address of the caller and then returns. */
4476 ix86_asm_file_end (file)
4482 for (regno = 0; regno < 8; ++regno)
4486 if (! ((pic_labels_used >> regno) & 1))
4489 get_pc_thunk_name (name, regno);
4491 if (USE_HIDDEN_LINKONCE)
4495 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4497 TREE_PUBLIC (decl) = 1;
4498 TREE_STATIC (decl) = 1;
4499 DECL_ONE_ONLY (decl) = 1;
4501 (*targetm.asm_out.unique_section) (decl, 0);
4502 named_section (decl, NULL, 0);
4504 (*targetm.asm_out.globalize_label) (file, name);
4505 fputs ("\t.hidden\t", file);
4506 assemble_name (file, name);
4508 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4513 ASM_OUTPUT_LABEL (file, name);
4516 xops[0] = gen_rtx_REG (SImode, regno);
4517 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4518 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4519 output_asm_insn ("ret", xops);
4523 /* Emit code for the SET_GOT patterns. */
4526 output_set_got (dest)
4532 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4534 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4536 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4539 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4541 output_asm_insn ("call\t%a2", xops);
4544 /* Output the "canonical" label name ("Lxx$pb") here too. This
4545 is what will be referred to by the Mach-O PIC subsystem. */
4546 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4548 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4549 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4552 output_asm_insn ("pop{l}\t%0", xops);
4557 get_pc_thunk_name (name, REGNO (dest));
4558 pic_labels_used |= 1 << REGNO (dest);
4560 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4561 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4562 output_asm_insn ("call\t%X2", xops);
4565 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4566 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4567 else if (!TARGET_MACHO)
4568 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4573 /* Generate an "push" pattern for input ARG. */
4579 return gen_rtx_SET (VOIDmode,
4581 gen_rtx_PRE_DEC (Pmode,
4582 stack_pointer_rtx)),
4586 /* Return >= 0 if there is an unused call-clobbered register available
4587 for the entire function. */
4590 ix86_select_alt_pic_regnum ()
4592 if (current_function_is_leaf && !current_function_profile)
4595 for (i = 2; i >= 0; --i)
4596 if (!regs_ever_live[i])
4600 return INVALID_REGNUM;
4603 /* Return 1 if we need to save REGNO. */
4605 ix86_save_reg (regno, maybe_eh_return)
4607 int maybe_eh_return;
4609 if (pic_offset_table_rtx
4610 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4611 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4612 || current_function_profile
4613 || current_function_calls_eh_return))
4615 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4620 if (current_function_calls_eh_return && maybe_eh_return)
4625 unsigned test = EH_RETURN_DATA_REGNO (i);
4626 if (test == INVALID_REGNUM)
4633 return (regs_ever_live[regno]
4634 && !call_used_regs[regno]
4635 && !fixed_regs[regno]
4636 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4639 /* Return number of registers to be saved on the stack. */
4647 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4648 if (ix86_save_reg (regno, true))
4653 /* Return the offset between two registers, one to be eliminated, and the other
4654 its replacement, at the start of a routine. */
4657 ix86_initial_elimination_offset (from, to)
4661 struct ix86_frame frame;
4662 ix86_compute_frame_layout (&frame);
4664 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4665 return frame.hard_frame_pointer_offset;
4666 else if (from == FRAME_POINTER_REGNUM
4667 && to == HARD_FRAME_POINTER_REGNUM)
4668 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4671 if (to != STACK_POINTER_REGNUM)
4673 else if (from == ARG_POINTER_REGNUM)
4674 return frame.stack_pointer_offset;
4675 else if (from != FRAME_POINTER_REGNUM)
4678 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4682 /* Fill structure ix86_frame about frame of currently computed function. */
4685 ix86_compute_frame_layout (frame)
4686 struct ix86_frame *frame;
4688 HOST_WIDE_INT total_size;
4689 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4691 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4692 HOST_WIDE_INT size = get_frame_size ();
4694 frame->nregs = ix86_nsaved_regs ();
4697 /* Skip return address and saved base pointer. */
4698 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4700 frame->hard_frame_pointer_offset = offset;
4702 /* Do some sanity checking of stack_alignment_needed and
4703 preferred_alignment, since i386 port is the only using those features
4704 that may break easily. */
4706 if (size && !stack_alignment_needed)
4708 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4710 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4712 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4715 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4716 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4718 /* Register save area */
4719 offset += frame->nregs * UNITS_PER_WORD;
4722 if (ix86_save_varrargs_registers)
4724 offset += X86_64_VARARGS_SIZE;
4725 frame->va_arg_size = X86_64_VARARGS_SIZE;
4728 frame->va_arg_size = 0;
4730 /* Align start of frame for local function. */
4731 frame->padding1 = ((offset + stack_alignment_needed - 1)
4732 & -stack_alignment_needed) - offset;
4734 offset += frame->padding1;
4736 /* Frame pointer points here. */
4737 frame->frame_pointer_offset = offset;
4741 /* Add outgoing arguments area. Can be skipped if we eliminated
4742 all the function calls as dead code. */
4743 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4745 offset += current_function_outgoing_args_size;
4746 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4749 frame->outgoing_arguments_size = 0;
4751 /* Align stack boundary. Only needed if we're calling another function
4753 if (!current_function_is_leaf || current_function_calls_alloca)
4754 frame->padding2 = ((offset + preferred_alignment - 1)
4755 & -preferred_alignment) - offset;
4757 frame->padding2 = 0;
4759 offset += frame->padding2;
4761 /* We've reached end of stack frame. */
4762 frame->stack_pointer_offset = offset;
4764 /* Size prologue needs to allocate. */
4765 frame->to_allocate =
4766 (size + frame->padding1 + frame->padding2
4767 + frame->outgoing_arguments_size + frame->va_arg_size);
4769 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4770 && current_function_is_leaf)
4772 frame->red_zone_size = frame->to_allocate;
4773 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4774 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4777 frame->red_zone_size = 0;
4778 frame->to_allocate -= frame->red_zone_size;
4779 frame->stack_pointer_offset -= frame->red_zone_size;
4781 fprintf (stderr, "nregs: %i\n", frame->nregs);
4782 fprintf (stderr, "size: %i\n", size);
4783 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4784 fprintf (stderr, "padding1: %i\n", frame->padding1);
4785 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4786 fprintf (stderr, "padding2: %i\n", frame->padding2);
4787 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4788 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4789 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4790 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4791 frame->hard_frame_pointer_offset);
4792 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4796 /* Emit code to save registers in the prologue. */
4799 ix86_emit_save_regs ()
4804 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4805 if (ix86_save_reg (regno, true))
4807 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4808 RTX_FRAME_RELATED_P (insn) = 1;
4812 /* Emit code to save registers using MOV insns. First register
4813 is restored from POINTER + OFFSET. */
4815 ix86_emit_save_regs_using_mov (pointer, offset)
4817 HOST_WIDE_INT offset;
4822 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4823 if (ix86_save_reg (regno, true))
4825 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4827 gen_rtx_REG (Pmode, regno));
4828 RTX_FRAME_RELATED_P (insn) = 1;
4829 offset += UNITS_PER_WORD;
4833 /* Expand the prologue into a bunch of separate insns. */
4836 ix86_expand_prologue ()
4840 struct ix86_frame frame;
4842 HOST_WIDE_INT allocate;
4844 ix86_compute_frame_layout (&frame);
4847 int count = frame.nregs;
4849 /* The fast prologue uses move instead of push to save registers. This
4850 is significantly longer, but also executes faster as modern hardware
4851 can execute the moves in parallel, but can't do that for push/pop.
4853 Be careful about choosing what prologue to emit: When function takes
4854 many instructions to execute we may use slow version as well as in
4855 case function is known to be outside hot spot (this is known with
4856 feedback only). Weight the size of function by number of registers
4857 to save as it is cheap to use one or two push instructions but very
4858 slow to use many of them. */
4860 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4861 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4862 || (flag_branch_probabilities
4863 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4864 use_fast_prologue_epilogue = 0;
4866 use_fast_prologue_epilogue = !expensive_function_p (count);
4867 if (TARGET_PROLOGUE_USING_MOVE)
4868 use_mov = use_fast_prologue_epilogue;
4871 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4872 slower on all targets. Also sdb doesn't like it. */
4874 if (frame_pointer_needed)
4876 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4877 RTX_FRAME_RELATED_P (insn) = 1;
4879 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4880 RTX_FRAME_RELATED_P (insn) = 1;
4883 allocate = frame.to_allocate;
4884 /* In case we are dealing only with single register and empty frame,
4885 push is equivalent of the mov+add sequence. */
4886 if (allocate == 0 && frame.nregs <= 1)
4890 ix86_emit_save_regs ();
4892 allocate += frame.nregs * UNITS_PER_WORD;
4896 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4898 insn = emit_insn (gen_pro_epilogue_adjust_stack
4899 (stack_pointer_rtx, stack_pointer_rtx,
4900 GEN_INT (-allocate)));
4901 RTX_FRAME_RELATED_P (insn) = 1;
4905 /* ??? Is this only valid for Win32? */
4912 arg0 = gen_rtx_REG (SImode, 0);
4913 emit_move_insn (arg0, GEN_INT (allocate));
4915 sym = gen_rtx_MEM (FUNCTION_MODE,
4916 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4917 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4919 CALL_INSN_FUNCTION_USAGE (insn)
4920 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4921 CALL_INSN_FUNCTION_USAGE (insn));
4923 /* Don't allow scheduling pass to move insns across __alloca
4925 emit_insn (gen_blockage (const0_rtx));
4929 if (!frame_pointer_needed || !frame.to_allocate)
4930 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4932 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4933 -frame.nregs * UNITS_PER_WORD);
4936 #ifdef SUBTARGET_PROLOGUE
4940 pic_reg_used = false;
4941 if (pic_offset_table_rtx
4942 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4943 || current_function_profile))
4945 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4947 if (alt_pic_reg_used != INVALID_REGNUM)
4948 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4950 pic_reg_used = true;
4955 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4957 /* Even with accurate pre-reload life analysis, we can wind up
4958 deleting all references to the pic register after reload.
4959 Consider if cross-jumping unifies two sides of a branch
4960 controlled by a comparison vs the only read from a global.
4961 In which case, allow the set_got to be deleted, though we're
4962 too late to do anything about the ebx save in the prologue. */
4963 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4966 /* Prevent function calls from be scheduled before the call to mcount.
4967 In the pic_reg_used case, make sure that the got load isn't deleted. */
4968 if (current_function_profile)
4969 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4972 /* Emit code to restore saved registers using MOV insns. First register
4973 is restored from POINTER + OFFSET. */
4975 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4978 int maybe_eh_return;
4982 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4983 if (ix86_save_reg (regno, maybe_eh_return))
4985 emit_move_insn (gen_rtx_REG (Pmode, regno),
4986 adjust_address (gen_rtx_MEM (Pmode, pointer),
4988 offset += UNITS_PER_WORD;
4992 /* Restore function stack, frame, and registers. */
4995 ix86_expand_epilogue (style)
4999 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5000 struct ix86_frame frame;
5001 HOST_WIDE_INT offset;
5003 ix86_compute_frame_layout (&frame);
5005 /* Calculate start of saved registers relative to ebp. Special care
5006 must be taken for the normal return case of a function using
5007 eh_return: the eax and edx registers are marked as saved, but not
5008 restored along this path. */
5009 offset = frame.nregs;
5010 if (current_function_calls_eh_return && style != 2)
5012 offset *= -UNITS_PER_WORD;
5014 /* If we're only restoring one register and sp is not valid then
5015 using a move instruction to restore the register since it's
5016 less work than reloading sp and popping the register.
5018 The default code result in stack adjustment using add/lea instruction,
5019 while this code results in LEAVE instruction (or discrete equivalent),
5020 so it is profitable in some other cases as well. Especially when there
5021 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5022 and there is exactly one register to pop. This heuristic may need some
5023 tuning in future. */
5024 if ((!sp_valid && frame.nregs <= 1)
5025 || (TARGET_EPILOGUE_USING_MOVE
5026 && use_fast_prologue_epilogue
5027 && (frame.nregs > 1 || frame.to_allocate))
5028 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5029 || (frame_pointer_needed && TARGET_USE_LEAVE
5030 && use_fast_prologue_epilogue && frame.nregs == 1)
5031 || current_function_calls_eh_return)
5033 /* Restore registers. We can use ebp or esp to address the memory
5034 locations. If both are available, default to ebp, since offsets
5035 are known to be small. Only exception is esp pointing directly to the
5036 end of block of saved registers, where we may simplify addressing
5039 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5040 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5041 frame.to_allocate, style == 2);
5043 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5044 offset, style == 2);
5046 /* eh_return epilogues need %ecx added to the stack pointer. */
5049 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5051 if (frame_pointer_needed)
5053 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5054 tmp = plus_constant (tmp, UNITS_PER_WORD);
5055 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5057 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5058 emit_move_insn (hard_frame_pointer_rtx, tmp);
5060 emit_insn (gen_pro_epilogue_adjust_stack
5061 (stack_pointer_rtx, sa, const0_rtx));
5065 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5066 tmp = plus_constant (tmp, (frame.to_allocate
5067 + frame.nregs * UNITS_PER_WORD));
5068 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5071 else if (!frame_pointer_needed)
5072 emit_insn (gen_pro_epilogue_adjust_stack
5073 (stack_pointer_rtx, stack_pointer_rtx,
5074 GEN_INT (frame.to_allocate
5075 + frame.nregs * UNITS_PER_WORD)));
5076 /* If not an i386, mov & pop is faster than "leave". */
5077 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5078 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5081 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5082 hard_frame_pointer_rtx,
5085 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5087 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5092 /* First step is to deallocate the stack frame so that we can
5093 pop the registers. */
5096 if (!frame_pointer_needed)
5098 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5099 hard_frame_pointer_rtx,
5102 else if (frame.to_allocate)
5103 emit_insn (gen_pro_epilogue_adjust_stack
5104 (stack_pointer_rtx, stack_pointer_rtx,
5105 GEN_INT (frame.to_allocate)));
5107 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5108 if (ix86_save_reg (regno, false))
5111 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5113 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5115 if (frame_pointer_needed)
5117 /* Leave results in shorter dependency chains on CPUs that are
5118 able to grok it fast. */
5119 if (TARGET_USE_LEAVE)
5120 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5121 else if (TARGET_64BIT)
5122 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5124 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5128 /* Sibcall epilogues don't want a return instruction. */
5132 if (current_function_pops_args && current_function_args_size)
5134 rtx popc = GEN_INT (current_function_pops_args);
5136 /* i386 can only pop 64K bytes. If asked to pop more, pop
5137 return address, do explicit add, and jump indirectly to the
5140 if (current_function_pops_args >= 65536)
5142 rtx ecx = gen_rtx_REG (SImode, 2);
5144 /* There are is no "pascal" calling convention in 64bit ABI. */
5148 emit_insn (gen_popsi1 (ecx));
5149 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5150 emit_jump_insn (gen_return_indirect_internal (ecx));
5153 emit_jump_insn (gen_return_pop_internal (popc));
5156 emit_jump_insn (gen_return_internal ());
5159 /* Reset from the function's potential modifications. */
5162 ix86_output_function_epilogue (file, size)
5163 FILE *file ATTRIBUTE_UNUSED;
5164 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5166 if (pic_offset_table_rtx)
5167 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5170 /* Extract the parts of an RTL expression that is a valid memory address
5171 for an instruction. Return 0 if the structure of the address is
5172 grossly off. Return -1 if the address contains ASHIFT, so it is not
5173 strictly valid, but still used for computing length of lea instruction.
5177 ix86_decompose_address (addr, out)
5179 struct ix86_address *out;
5181 rtx base = NULL_RTX;
5182 rtx index = NULL_RTX;
5183 rtx disp = NULL_RTX;
5184 HOST_WIDE_INT scale = 1;
5185 rtx scale_rtx = NULL_RTX;
5188 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5190 else if (GET_CODE (addr) == PLUS)
5192 rtx op0 = XEXP (addr, 0);
5193 rtx op1 = XEXP (addr, 1);
5194 enum rtx_code code0 = GET_CODE (op0);
5195 enum rtx_code code1 = GET_CODE (op1);
5197 if (code0 == REG || code0 == SUBREG)
5199 if (code1 == REG || code1 == SUBREG)
5200 index = op0, base = op1; /* index + base */
5202 base = op0, disp = op1; /* base + displacement */
5204 else if (code0 == MULT)
5206 index = XEXP (op0, 0);
5207 scale_rtx = XEXP (op0, 1);
5208 if (code1 == REG || code1 == SUBREG)
5209 base = op1; /* index*scale + base */
5211 disp = op1; /* index*scale + disp */
5213 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5215 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5216 scale_rtx = XEXP (XEXP (op0, 0), 1);
5217 base = XEXP (op0, 1);
5220 else if (code0 == PLUS)
5222 index = XEXP (op0, 0); /* index + base + disp */
5223 base = XEXP (op0, 1);
5229 else if (GET_CODE (addr) == MULT)
5231 index = XEXP (addr, 0); /* index*scale */
5232 scale_rtx = XEXP (addr, 1);
5234 else if (GET_CODE (addr) == ASHIFT)
5238 /* We're called for lea too, which implements ashift on occasion. */
5239 index = XEXP (addr, 0);
5240 tmp = XEXP (addr, 1);
5241 if (GET_CODE (tmp) != CONST_INT)
5243 scale = INTVAL (tmp);
5244 if ((unsigned HOST_WIDE_INT) scale > 3)
5250 disp = addr; /* displacement */
5252 /* Extract the integral value of scale. */
5255 if (GET_CODE (scale_rtx) != CONST_INT)
5257 scale = INTVAL (scale_rtx);
5260 /* Allow arg pointer and stack pointer as index if there is not scaling */
5261 if (base && index && scale == 1
5262 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5263 || index == stack_pointer_rtx))
5270 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5271 if ((base == hard_frame_pointer_rtx
5272 || base == frame_pointer_rtx
5273 || base == arg_pointer_rtx) && !disp)
5276 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5277 Avoid this by transforming to [%esi+0]. */
5278 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5279 && base && !index && !disp
5281 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5284 /* Special case: encode reg+reg instead of reg*2. */
5285 if (!base && index && scale && scale == 2)
5286 base = index, scale = 1;
5288 /* Special case: scaling cannot be encoded without base or displacement. */
5289 if (!base && !disp && index && scale != 1)
5300 /* Return cost of the memory address x.
5301 For i386, it is better to use a complex address than let gcc copy
5302 the address into a reg and make a new pseudo. But not if the address
5303 requires to two regs - that would mean more pseudos with longer
5306 ix86_address_cost (x)
5309 struct ix86_address parts;
5312 if (!ix86_decompose_address (x, &parts))
5315 if (parts.base && GET_CODE (parts.base) == SUBREG)
5316 parts.base = SUBREG_REG (parts.base);
5317 if (parts.index && GET_CODE (parts.index) == SUBREG)
5318 parts.index = SUBREG_REG (parts.index);
5320 /* More complex memory references are better. */
5321 if (parts.disp && parts.disp != const0_rtx)
5324 /* Attempt to minimize number of registers in the address. */
5326 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5328 && (!REG_P (parts.index)
5329 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5333 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5335 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5336 && parts.base != parts.index)
5339 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5340 since it's predecode logic can't detect the length of instructions
5341 and it degenerates to vector decoded. Increase cost of such
5342 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5343 to split such addresses or even refuse such addresses at all.
5345 Following addressing modes are affected:
5350 The first and last case may be avoidable by explicitly coding the zero in
5351 memory address, but I don't have AMD-K6 machine handy to check this
5355 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5356 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5357 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5363 /* If X is a machine specific address (i.e. a symbol or label being
5364 referenced as a displacement from the GOT implemented using an
5365 UNSPEC), then return the base term. Otherwise return X. */
5368 ix86_find_base_term (x)
5375 if (GET_CODE (x) != CONST)
5378 if (GET_CODE (term) == PLUS
5379 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5380 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5381 term = XEXP (term, 0);
5382 if (GET_CODE (term) != UNSPEC
5383 || XINT (term, 1) != UNSPEC_GOTPCREL)
5386 term = XVECEXP (term, 0, 0);
5388 if (GET_CODE (term) != SYMBOL_REF
5389 && GET_CODE (term) != LABEL_REF)
5395 term = ix86_delegitimize_address (x);
5397 if (GET_CODE (term) != SYMBOL_REF
5398 && GET_CODE (term) != LABEL_REF)
5404 /* Determine if a given RTX is a valid constant. We already know this
5405 satisfies CONSTANT_P. */
5408 legitimate_constant_p (x)
5413 switch (GET_CODE (x))
5416 /* TLS symbols are not constant. */
5417 if (tls_symbolic_operand (x, Pmode))
5422 inner = XEXP (x, 0);
5424 /* Offsets of TLS symbols are never valid.
5425 Discourage CSE from creating them. */
5426 if (GET_CODE (inner) == PLUS
5427 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5430 /* Only some unspecs are valid as "constants". */
5431 if (GET_CODE (inner) == UNSPEC)
5432 switch (XINT (inner, 1))
5435 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5445 /* Otherwise we handle everything else in the move patterns. */
5449 /* Determine if it's legal to put X into the constant pool. This
5450 is not possible for the address of thread-local symbols, which
5451 is checked above. */
5454 ix86_cannot_force_const_mem (x)
5457 return !legitimate_constant_p (x);
5460 /* Determine if a given RTX is a valid constant address. */
5463 constant_address_p (x)
5466 switch (GET_CODE (x))
5473 return TARGET_64BIT;
5476 /* For Mach-O, really believe the CONST. */
5479 /* Otherwise fall through. */
5481 return !flag_pic && legitimate_constant_p (x);
5488 /* Nonzero if the constant value X is a legitimate general operand
5489 when generating PIC code. It is given that flag_pic is on and
5490 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5493 legitimate_pic_operand_p (x)
5498 switch (GET_CODE (x))
5501 inner = XEXP (x, 0);
5503 /* Only some unspecs are valid as "constants". */
5504 if (GET_CODE (inner) == UNSPEC)
5505 switch (XINT (inner, 1))
5508 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5516 return legitimate_pic_address_disp_p (x);
5523 /* Determine if a given CONST RTX is a valid memory displacement
5527 legitimate_pic_address_disp_p (disp)
5532 /* In 64bit mode we can allow direct addresses of symbols and labels
5533 when they are not dynamic symbols. */
5536 /* TLS references should always be enclosed in UNSPEC. */
5537 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5539 if (GET_CODE (disp) == SYMBOL_REF
5540 && ix86_cmodel == CM_SMALL_PIC
5541 && (CONSTANT_POOL_ADDRESS_P (disp)
5542 || SYMBOL_REF_FLAG (disp)))
5544 if (GET_CODE (disp) == LABEL_REF)
5546 if (GET_CODE (disp) == CONST
5547 && GET_CODE (XEXP (disp, 0)) == PLUS
5548 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5549 && ix86_cmodel == CM_SMALL_PIC
5550 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5551 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5552 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5553 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5554 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5555 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5558 if (GET_CODE (disp) != CONST)
5560 disp = XEXP (disp, 0);
5564 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5565 of GOT tables. We should not need these anyway. */
5566 if (GET_CODE (disp) != UNSPEC
5567 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5570 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5571 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5577 if (GET_CODE (disp) == PLUS)
5579 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5581 disp = XEXP (disp, 0);
5585 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5586 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5588 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5589 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5590 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5592 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5593 if (strstr (sym_name, "$pb") != 0)
5598 if (GET_CODE (disp) != UNSPEC)
5601 switch (XINT (disp, 1))
5606 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5608 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5609 case UNSPEC_GOTTPOFF:
5610 case UNSPEC_GOTNTPOFF:
5611 case UNSPEC_INDNTPOFF:
5614 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5616 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5618 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5624 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5625 memory address for an instruction. The MODE argument is the machine mode
5626 for the MEM expression that wants to use this address.
5628 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5629 convert common non-canonical forms to canonical form so that they will
5633 legitimate_address_p (mode, addr, strict)
5634 enum machine_mode mode;
5638 struct ix86_address parts;
5639 rtx base, index, disp;
5640 HOST_WIDE_INT scale;
5641 const char *reason = NULL;
5642 rtx reason_rtx = NULL_RTX;
5644 if (TARGET_DEBUG_ADDR)
5647 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5648 GET_MODE_NAME (mode), strict);
5652 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5654 if (TARGET_DEBUG_ADDR)
5655 fprintf (stderr, "Success.\n");
5659 if (ix86_decompose_address (addr, &parts) <= 0)
5661 reason = "decomposition failed";
5666 index = parts.index;
5668 scale = parts.scale;
5670 /* Validate base register.
5672 Don't allow SUBREG's here, it can lead to spill failures when the base
5673 is one word out of a two word structure, which is represented internally
5681 if (GET_CODE (base) == SUBREG)
5682 reg = SUBREG_REG (base);
5686 if (GET_CODE (reg) != REG)
5688 reason = "base is not a register";
5692 if (GET_MODE (base) != Pmode)
5694 reason = "base is not in Pmode";
5698 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5699 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5701 reason = "base is not valid";
5706 /* Validate index register.
5708 Don't allow SUBREG's here, it can lead to spill failures when the index
5709 is one word out of a two word structure, which is represented internally
5717 if (GET_CODE (index) == SUBREG)
5718 reg = SUBREG_REG (index);
5722 if (GET_CODE (reg) != REG)
5724 reason = "index is not a register";
5728 if (GET_MODE (index) != Pmode)
5730 reason = "index is not in Pmode";
5734 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5735 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5737 reason = "index is not valid";
5742 /* Validate scale factor. */
5745 reason_rtx = GEN_INT (scale);
5748 reason = "scale without index";
5752 if (scale != 2 && scale != 4 && scale != 8)
5754 reason = "scale is not a valid multiplier";
5759 /* Validate displacement. */
5764 if (GET_CODE (disp) == CONST
5765 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5766 switch (XINT (XEXP (disp, 0), 1))
5770 case UNSPEC_GOTPCREL:
5773 goto is_legitimate_pic;
5775 case UNSPEC_GOTTPOFF:
5776 case UNSPEC_GOTNTPOFF:
5777 case UNSPEC_INDNTPOFF:
5783 reason = "invalid address unspec";
5787 else if (flag_pic && (SYMBOLIC_CONST (disp)
5789 && !machopic_operand_p (disp)
5794 if (TARGET_64BIT && (index || base))
5796 /* foo@dtpoff(%rX) is ok. */
5797 if (GET_CODE (disp) != CONST
5798 || GET_CODE (XEXP (disp, 0)) != PLUS
5799 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5800 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5801 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5802 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5804 reason = "non-constant pic memory reference";
5808 else if (! legitimate_pic_address_disp_p (disp))
5810 reason = "displacement is an invalid pic construct";
5814 /* This code used to verify that a symbolic pic displacement
5815 includes the pic_offset_table_rtx register.
5817 While this is good idea, unfortunately these constructs may
5818 be created by "adds using lea" optimization for incorrect
5827 This code is nonsensical, but results in addressing
5828 GOT table with pic_offset_table_rtx base. We can't
5829 just refuse it easily, since it gets matched by
5830 "addsi3" pattern, that later gets split to lea in the
5831 case output register differs from input. While this
5832 can be handled by separate addsi pattern for this case
5833 that never results in lea, this seems to be easier and
5834 correct fix for crash to disable this test. */
5836 else if (!CONSTANT_ADDRESS_P (disp))
5838 reason = "displacement is not constant";
5841 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5843 reason = "displacement is out of range";
5846 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5848 reason = "displacement is a const_double";
5853 /* Everything looks valid. */
5854 if (TARGET_DEBUG_ADDR)
5855 fprintf (stderr, "Success.\n");
5859 if (TARGET_DEBUG_ADDR)
5861 fprintf (stderr, "Error: %s\n", reason);
5862 debug_rtx (reason_rtx);
5867 /* Return an unique alias set for the GOT. */
5869 static HOST_WIDE_INT
5870 ix86_GOT_alias_set ()
5872 static HOST_WIDE_INT set = -1;
5874 set = new_alias_set ();
5878 /* Return a legitimate reference for ORIG (an address) using the
5879 register REG. If REG is 0, a new pseudo is generated.
5881 There are two types of references that must be handled:
5883 1. Global data references must load the address from the GOT, via
5884 the PIC reg. An insn is emitted to do this load, and the reg is
5887 2. Static data references, constant pool addresses, and code labels
5888 compute the address as an offset from the GOT, whose base is in
5889 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5890 differentiate them from global data objects. The returned
5891 address is the PIC reg + an unspec constant.
5893 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5894 reg also appears in the address. */
5897 legitimize_pic_address (orig, reg)
5907 reg = gen_reg_rtx (Pmode);
5908 /* Use the generic Mach-O PIC machinery. */
5909 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5912 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5914 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5916 /* This symbol may be referenced via a displacement from the PIC
5917 base address (@GOTOFF). */
5919 if (reload_in_progress)
5920 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5921 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5922 new = gen_rtx_CONST (Pmode, new);
5923 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5927 emit_move_insn (reg, new);
5931 else if (GET_CODE (addr) == SYMBOL_REF)
5935 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5936 new = gen_rtx_CONST (Pmode, new);
5937 new = gen_rtx_MEM (Pmode, new);
5938 RTX_UNCHANGING_P (new) = 1;
5939 set_mem_alias_set (new, ix86_GOT_alias_set ());
5942 reg = gen_reg_rtx (Pmode);
5943 /* Use directly gen_movsi, otherwise the address is loaded
5944 into register for CSE. We don't want to CSE this addresses,
5945 instead we CSE addresses from the GOT table, so skip this. */
5946 emit_insn (gen_movsi (reg, new));
5951 /* This symbol must be referenced via a load from the
5952 Global Offset Table (@GOT). */
5954 if (reload_in_progress)
5955 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5956 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5957 new = gen_rtx_CONST (Pmode, new);
5958 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5959 new = gen_rtx_MEM (Pmode, new);
5960 RTX_UNCHANGING_P (new) = 1;
5961 set_mem_alias_set (new, ix86_GOT_alias_set ());
5964 reg = gen_reg_rtx (Pmode);
5965 emit_move_insn (reg, new);
5971 if (GET_CODE (addr) == CONST)
5973 addr = XEXP (addr, 0);
5975 /* We must match stuff we generate before. Assume the only
5976 unspecs that can get here are ours. Not that we could do
5977 anything with them anyway... */
5978 if (GET_CODE (addr) == UNSPEC
5979 || (GET_CODE (addr) == PLUS
5980 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5982 if (GET_CODE (addr) != PLUS)
5985 if (GET_CODE (addr) == PLUS)
5987 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5989 /* Check first to see if this is a constant offset from a @GOTOFF
5990 symbol reference. */
5991 if (local_symbolic_operand (op0, Pmode)
5992 && GET_CODE (op1) == CONST_INT)
5996 if (reload_in_progress)
5997 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5998 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6000 new = gen_rtx_PLUS (Pmode, new, op1);
6001 new = gen_rtx_CONST (Pmode, new);
6002 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6006 emit_move_insn (reg, new);
6012 if (INTVAL (op1) < -16*1024*1024
6013 || INTVAL (op1) >= 16*1024*1024)
6014 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6019 base = legitimize_pic_address (XEXP (addr, 0), reg);
6020 new = legitimize_pic_address (XEXP (addr, 1),
6021 base == reg ? NULL_RTX : reg);
6023 if (GET_CODE (new) == CONST_INT)
6024 new = plus_constant (base, INTVAL (new));
6027 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6029 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6030 new = XEXP (new, 1);
6032 new = gen_rtx_PLUS (Pmode, base, new);
6041 ix86_encode_section_info (decl, first)
6043 int first ATTRIBUTE_UNUSED;
6045 bool local_p = (*targetm.binds_local_p) (decl);
6048 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6049 if (GET_CODE (rtl) != MEM)
6051 symbol = XEXP (rtl, 0);
6052 if (GET_CODE (symbol) != SYMBOL_REF)
6055 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6056 symbol so that we may access it directly in the GOT. */
6059 SYMBOL_REF_FLAG (symbol) = local_p;
6061 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6062 "local dynamic", "initial exec" or "local exec" TLS models
6065 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6067 const char *symbol_str;
6070 enum tls_model kind = decl_tls_model (decl);
6072 if (TARGET_64BIT && ! flag_pic)
6074 /* x86-64 doesn't allow non-pic code for shared libraries,
6075 so don't generate GD/LD TLS models for non-pic code. */
6078 case TLS_MODEL_GLOBAL_DYNAMIC:
6079 kind = TLS_MODEL_INITIAL_EXEC; break;
6080 case TLS_MODEL_LOCAL_DYNAMIC:
6081 kind = TLS_MODEL_LOCAL_EXEC; break;
6087 symbol_str = XSTR (symbol, 0);
6089 if (symbol_str[0] == '%')
6091 if (symbol_str[1] == tls_model_chars[kind])
6095 len = strlen (symbol_str) + 1;
6096 newstr = alloca (len + 2);
6099 newstr[1] = tls_model_chars[kind];
6100 memcpy (newstr + 2, symbol_str, len);
6102 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6106 /* Undo the above when printing symbol names. */
6109 ix86_strip_name_encoding (str)
6119 /* Load the thread pointer into a register. */
6122 get_thread_pointer ()
6126 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6127 tp = gen_rtx_MEM (Pmode, tp);
6128 RTX_UNCHANGING_P (tp) = 1;
6129 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6130 tp = force_reg (Pmode, tp);
6135 /* Try machine-dependent ways of modifying an illegitimate address
6136 to be legitimate. If we find one, return the new, valid address.
6137 This macro is used in only one place: `memory_address' in explow.c.
6139 OLDX is the address as it was before break_out_memory_refs was called.
6140 In some cases it is useful to look at this to decide what needs to be done.
6142 MODE and WIN are passed so that this macro can use
6143 GO_IF_LEGITIMATE_ADDRESS.
6145 It is always safe for this macro to do nothing. It exists to recognize
6146 opportunities to optimize the output.
6148 For the 80386, we handle X+REG by loading X into a register R and
6149 using R+REG. R will go in a general reg and indexing will be used.
6150 However, if REG is a broken-out memory address or multiplication,
6151 nothing needs to be done because REG can certainly go in a general reg.
6153 When -fpic is used, special handling is needed for symbolic references.
6154 See comments by legitimize_pic_address in i386.c for details. */
6157 legitimize_address (x, oldx, mode)
6159 register rtx oldx ATTRIBUTE_UNUSED;
6160 enum machine_mode mode;
6165 if (TARGET_DEBUG_ADDR)
6167 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6168 GET_MODE_NAME (mode));
6172 log = tls_symbolic_operand (x, mode);
6175 rtx dest, base, off, pic;
6180 case TLS_MODEL_GLOBAL_DYNAMIC:
6181 dest = gen_reg_rtx (Pmode);
6184 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6187 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6188 insns = get_insns ();
6191 emit_libcall_block (insns, dest, rax, x);
6194 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6197 case TLS_MODEL_LOCAL_DYNAMIC:
6198 base = gen_reg_rtx (Pmode);
6201 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6204 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6205 insns = get_insns ();
6208 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6209 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6210 emit_libcall_block (insns, base, rax, note);
6213 emit_insn (gen_tls_local_dynamic_base_32 (base));
6215 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6216 off = gen_rtx_CONST (Pmode, off);
6218 return gen_rtx_PLUS (Pmode, base, off);
6220 case TLS_MODEL_INITIAL_EXEC:
6224 type = UNSPEC_GOTNTPOFF;
6228 if (reload_in_progress)
6229 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6230 pic = pic_offset_table_rtx;
6231 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6233 else if (!TARGET_GNU_TLS)
6235 pic = gen_reg_rtx (Pmode);
6236 emit_insn (gen_set_got (pic));
6237 type = UNSPEC_GOTTPOFF;
6242 type = UNSPEC_INDNTPOFF;
6245 base = get_thread_pointer ();
6247 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6248 off = gen_rtx_CONST (Pmode, off);
6250 off = gen_rtx_PLUS (Pmode, pic, off);
6251 off = gen_rtx_MEM (Pmode, off);
6252 RTX_UNCHANGING_P (off) = 1;
6253 set_mem_alias_set (off, ix86_GOT_alias_set ());
6254 dest = gen_reg_rtx (Pmode);
6256 if (TARGET_64BIT || TARGET_GNU_TLS)
6258 emit_move_insn (dest, off);
6259 return gen_rtx_PLUS (Pmode, base, dest);
6262 emit_insn (gen_subsi3 (dest, base, off));
6265 case TLS_MODEL_LOCAL_EXEC:
6266 base = get_thread_pointer ();
6268 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6269 (TARGET_64BIT || TARGET_GNU_TLS)
6270 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6271 off = gen_rtx_CONST (Pmode, off);
6273 if (TARGET_64BIT || TARGET_GNU_TLS)
6274 return gen_rtx_PLUS (Pmode, base, off);
6277 dest = gen_reg_rtx (Pmode);
6278 emit_insn (gen_subsi3 (dest, base, off));
6289 if (flag_pic && SYMBOLIC_CONST (x))
6290 return legitimize_pic_address (x, 0);
6292 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6293 if (GET_CODE (x) == ASHIFT
6294 && GET_CODE (XEXP (x, 1)) == CONST_INT
6295 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6298 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6299 GEN_INT (1 << log));
6302 if (GET_CODE (x) == PLUS)
6304 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6306 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6307 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6308 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6311 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6312 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6313 GEN_INT (1 << log));
6316 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6317 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6318 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6321 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6322 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6323 GEN_INT (1 << log));
6326 /* Put multiply first if it isn't already. */
6327 if (GET_CODE (XEXP (x, 1)) == MULT)
6329 rtx tmp = XEXP (x, 0);
6330 XEXP (x, 0) = XEXP (x, 1);
6335 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6336 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6337 created by virtual register instantiation, register elimination, and
6338 similar optimizations. */
6339 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6342 x = gen_rtx_PLUS (Pmode,
6343 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6344 XEXP (XEXP (x, 1), 0)),
6345 XEXP (XEXP (x, 1), 1));
6349 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6350 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6351 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6352 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6353 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6354 && CONSTANT_P (XEXP (x, 1)))
6357 rtx other = NULL_RTX;
6359 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6361 constant = XEXP (x, 1);
6362 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6364 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6366 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6367 other = XEXP (x, 1);
6375 x = gen_rtx_PLUS (Pmode,
6376 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6377 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6378 plus_constant (other, INTVAL (constant)));
6382 if (changed && legitimate_address_p (mode, x, FALSE))
6385 if (GET_CODE (XEXP (x, 0)) == MULT)
6388 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6391 if (GET_CODE (XEXP (x, 1)) == MULT)
6394 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6398 && GET_CODE (XEXP (x, 1)) == REG
6399 && GET_CODE (XEXP (x, 0)) == REG)
6402 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6405 x = legitimize_pic_address (x, 0);
6408 if (changed && legitimate_address_p (mode, x, FALSE))
6411 if (GET_CODE (XEXP (x, 0)) == REG)
6413 register rtx temp = gen_reg_rtx (Pmode);
6414 register rtx val = force_operand (XEXP (x, 1), temp);
6416 emit_move_insn (temp, val);
6422 else if (GET_CODE (XEXP (x, 1)) == REG)
6424 register rtx temp = gen_reg_rtx (Pmode);
6425 register rtx val = force_operand (XEXP (x, 0), temp);
6427 emit_move_insn (temp, val);
6437 /* Print an integer constant expression in assembler syntax. Addition
6438 and subtraction are the only arithmetic that may appear in these
6439 expressions. FILE is the stdio stream to write to, X is the rtx, and
6440 CODE is the operand print code from the output string. */
6443 output_pic_addr_const (file, x, code)
6450 switch (GET_CODE (x))
6460 assemble_name (file, XSTR (x, 0));
6461 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6462 fputs ("@PLT", file);
6469 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6470 assemble_name (asm_out_file, buf);
6474 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6478 /* This used to output parentheses around the expression,
6479 but that does not work on the 386 (either ATT or BSD assembler). */
6480 output_pic_addr_const (file, XEXP (x, 0), code);
6484 if (GET_MODE (x) == VOIDmode)
6486 /* We can use %d if the number is <32 bits and positive. */
6487 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6488 fprintf (file, "0x%lx%08lx",
6489 (unsigned long) CONST_DOUBLE_HIGH (x),
6490 (unsigned long) CONST_DOUBLE_LOW (x));
6492 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6495 /* We can't handle floating point constants;
6496 PRINT_OPERAND must handle them. */
6497 output_operand_lossage ("floating constant misused");
6501 /* Some assemblers need integer constants to appear first. */
6502 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6504 output_pic_addr_const (file, XEXP (x, 0), code);
6506 output_pic_addr_const (file, XEXP (x, 1), code);
6508 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6510 output_pic_addr_const (file, XEXP (x, 1), code);
6512 output_pic_addr_const (file, XEXP (x, 0), code);
6520 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6521 output_pic_addr_const (file, XEXP (x, 0), code);
6523 output_pic_addr_const (file, XEXP (x, 1), code);
6525 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6529 if (XVECLEN (x, 0) != 1)
6531 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6532 switch (XINT (x, 1))
6535 fputs ("@GOT", file);
6538 fputs ("@GOTOFF", file);
6540 case UNSPEC_GOTPCREL:
6541 fputs ("@GOTPCREL(%rip)", file);
6543 case UNSPEC_GOTTPOFF:
6544 /* FIXME: This might be @TPOFF in Sun ld too. */
6545 fputs ("@GOTTPOFF", file);
6548 fputs ("@TPOFF", file);
6552 fputs ("@TPOFF", file);
6554 fputs ("@NTPOFF", file);
6557 fputs ("@DTPOFF", file);
6559 case UNSPEC_GOTNTPOFF:
6561 fputs ("@GOTTPOFF(%rip)", file);
6563 fputs ("@GOTNTPOFF", file);
6565 case UNSPEC_INDNTPOFF:
6566 fputs ("@INDNTPOFF", file);
6569 output_operand_lossage ("invalid UNSPEC as operand");
6575 output_operand_lossage ("invalid expression as operand");
6579 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6580 We need to handle our special PIC relocations. */
6583 i386_dwarf_output_addr_const (file, x)
6588 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6592 fprintf (file, "%s", ASM_LONG);
6595 output_pic_addr_const (file, x, '\0');
6597 output_addr_const (file, x);
6601 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6602 We need to emit DTP-relative relocations. */
6605 i386_output_dwarf_dtprel (file, size, x)
6610 fputs (ASM_LONG, file);
6611 output_addr_const (file, x);
6612 fputs ("@DTPOFF", file);
6618 fputs (", 0", file);
6625 /* In the name of slightly smaller debug output, and to cater to
6626 general assembler losage, recognize PIC+GOTOFF and turn it back
6627 into a direct symbol reference. */
6630 ix86_delegitimize_address (orig_x)
6635 if (GET_CODE (x) == MEM)
6640 if (GET_CODE (x) != CONST
6641 || GET_CODE (XEXP (x, 0)) != UNSPEC
6642 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6643 || GET_CODE (orig_x) != MEM)
6645 return XVECEXP (XEXP (x, 0), 0, 0);
6648 if (GET_CODE (x) != PLUS
6649 || GET_CODE (XEXP (x, 1)) != CONST)
6652 if (GET_CODE (XEXP (x, 0)) == REG
6653 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6654 /* %ebx + GOT/GOTOFF */
6656 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6658 /* %ebx + %reg * scale + GOT/GOTOFF */
6660 if (GET_CODE (XEXP (y, 0)) == REG
6661 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6663 else if (GET_CODE (XEXP (y, 1)) == REG
6664 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6668 if (GET_CODE (y) != REG
6669 && GET_CODE (y) != MULT
6670 && GET_CODE (y) != ASHIFT)
6676 x = XEXP (XEXP (x, 1), 0);
6677 if (GET_CODE (x) == UNSPEC
6678 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6679 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6682 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6683 return XVECEXP (x, 0, 0);
6686 if (GET_CODE (x) == PLUS
6687 && GET_CODE (XEXP (x, 0)) == UNSPEC
6688 && GET_CODE (XEXP (x, 1)) == CONST_INT
6689 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6690 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6691 && GET_CODE (orig_x) != MEM)))
6693 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6695 return gen_rtx_PLUS (Pmode, y, x);
6703 put_condition_code (code, mode, reverse, fp, file)
6705 enum machine_mode mode;
6711 if (mode == CCFPmode || mode == CCFPUmode)
6713 enum rtx_code second_code, bypass_code;
6714 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6715 if (bypass_code != NIL || second_code != NIL)
6717 code = ix86_fp_compare_code_to_integer (code);
6721 code = reverse_condition (code);
6732 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6737 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6738 Those same assemblers have the same but opposite losage on cmov. */
6741 suffix = fp ? "nbe" : "a";
6744 if (mode == CCNOmode || mode == CCGOCmode)
6746 else if (mode == CCmode || mode == CCGCmode)
6757 if (mode == CCNOmode || mode == CCGOCmode)
6759 else if (mode == CCmode || mode == CCGCmode)
6768 suffix = fp ? "nb" : "ae";
6771 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6781 suffix = fp ? "u" : "p";
6784 suffix = fp ? "nu" : "np";
6789 fputs (suffix, file);
6793 print_reg (x, code, file)
6798 if (REGNO (x) == ARG_POINTER_REGNUM
6799 || REGNO (x) == FRAME_POINTER_REGNUM
6800 || REGNO (x) == FLAGS_REG
6801 || REGNO (x) == FPSR_REG)
6804 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6807 if (code == 'w' || MMX_REG_P (x))
6809 else if (code == 'b')
6811 else if (code == 'k')
6813 else if (code == 'q')
6815 else if (code == 'y')
6817 else if (code == 'h')
6820 code = GET_MODE_SIZE (GET_MODE (x));
6822 /* Irritatingly, AMD extended registers use different naming convention
6823 from the normal registers. */
6824 if (REX_INT_REG_P (x))
6831 error ("extended registers have no high halves");
6834 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6837 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6840 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6843 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6846 error ("unsupported operand size for extended register");
6854 if (STACK_TOP_P (x))
6856 fputs ("st(0)", file);
6863 if (! ANY_FP_REG_P (x))
6864 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6868 fputs (hi_reg_name[REGNO (x)], file);
6871 fputs (qi_reg_name[REGNO (x)], file);
6874 fputs (qi_high_reg_name[REGNO (x)], file);
6881 /* Locate some local-dynamic symbol still in use by this function
6882 so that we can print its name in some tls_local_dynamic_base
6886 get_some_local_dynamic_name ()
6890 if (cfun->machine->some_ld_name)
6891 return cfun->machine->some_ld_name;
6893 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6895 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6896 return cfun->machine->some_ld_name;
6902 get_some_local_dynamic_name_1 (px, data)
6904 void *data ATTRIBUTE_UNUSED;
6908 if (GET_CODE (x) == SYMBOL_REF
6909 && local_dynamic_symbolic_operand (x, Pmode))
6911 cfun->machine->some_ld_name = XSTR (x, 0);
6919 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6920 C -- print opcode suffix for set/cmov insn.
6921 c -- like C, but print reversed condition
6922 F,f -- likewise, but for floating-point.
6923 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6925 R -- print the prefix for register names.
6926 z -- print the opcode suffix for the size of the current operand.
6927 * -- print a star (in certain assembler syntax)
6928 A -- print an absolute memory reference.
6929 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6930 s -- print a shift double count, followed by the assemblers argument
6932 b -- print the QImode name of the register for the indicated operand.
6933 %b0 would print %al if operands[0] is reg 0.
6934 w -- likewise, print the HImode name of the register.
6935 k -- likewise, print the SImode name of the register.
6936 q -- likewise, print the DImode name of the register.
6937 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6938 y -- print "st(0)" instead of "st" as a register.
6939 D -- print condition for SSE cmp instruction.
6940 P -- if PIC, print an @PLT suffix.
6941 X -- don't print any sort of PIC '@' suffix for a symbol.
6942 & -- print some in-use local-dynamic symbol name.
6946 print_operand (file, x, code)
6956 if (ASSEMBLER_DIALECT == ASM_ATT)
6961 assemble_name (file, get_some_local_dynamic_name ());
6965 if (ASSEMBLER_DIALECT == ASM_ATT)
6967 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6969 /* Intel syntax. For absolute addresses, registers should not
6970 be surrounded by braces. */
6971 if (GET_CODE (x) != REG)
6974 PRINT_OPERAND (file, x, 0);
6982 PRINT_OPERAND (file, x, 0);
6987 if (ASSEMBLER_DIALECT == ASM_ATT)
6992 if (ASSEMBLER_DIALECT == ASM_ATT)
6997 if (ASSEMBLER_DIALECT == ASM_ATT)
7002 if (ASSEMBLER_DIALECT == ASM_ATT)
7007 if (ASSEMBLER_DIALECT == ASM_ATT)
7012 if (ASSEMBLER_DIALECT == ASM_ATT)
7017 /* 387 opcodes don't get size suffixes if the operands are
7019 if (STACK_REG_P (x))
7022 /* Likewise if using Intel opcodes. */
7023 if (ASSEMBLER_DIALECT == ASM_INTEL)
7026 /* This is the size of op from size of operand. */
7027 switch (GET_MODE_SIZE (GET_MODE (x)))
7030 #ifdef HAVE_GAS_FILDS_FISTS
7036 if (GET_MODE (x) == SFmode)
7051 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7053 #ifdef GAS_MNEMONICS
7079 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7081 PRINT_OPERAND (file, x, 0);
7087 /* Little bit of braindamage here. The SSE compare instructions
7088 does use completely different names for the comparisons that the
7089 fp conditional moves. */
7090 switch (GET_CODE (x))
7105 fputs ("unord", file);
7109 fputs ("neq", file);
7113 fputs ("nlt", file);
7117 fputs ("nle", file);
7120 fputs ("ord", file);
7128 #ifdef CMOV_SUN_AS_SYNTAX
7129 if (ASSEMBLER_DIALECT == ASM_ATT)
7131 switch (GET_MODE (x))
7133 case HImode: putc ('w', file); break;
7135 case SFmode: putc ('l', file); break;
7137 case DFmode: putc ('q', file); break;
7145 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7148 #ifdef CMOV_SUN_AS_SYNTAX
7149 if (ASSEMBLER_DIALECT == ASM_ATT)
7152 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7155 /* Like above, but reverse condition */
7157 /* Check to see if argument to %c is really a constant
7158 and not a condition code which needs to be reversed. */
7159 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7161 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7164 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7167 #ifdef CMOV_SUN_AS_SYNTAX
7168 if (ASSEMBLER_DIALECT == ASM_ATT)
7171 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7177 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7180 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7183 int pred_val = INTVAL (XEXP (x, 0));
7185 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7186 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7188 int taken = pred_val > REG_BR_PROB_BASE / 2;
7189 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7191 /* Emit hints only in the case default branch prediction
7192 heuristics would fail. */
7193 if (taken != cputaken)
7195 /* We use 3e (DS) prefix for taken branches and
7196 2e (CS) prefix for not taken branches. */
7198 fputs ("ds ; ", file);
7200 fputs ("cs ; ", file);
7207 output_operand_lossage ("invalid operand code `%c'", code);
7211 if (GET_CODE (x) == REG)
7213 PRINT_REG (x, code, file);
7216 else if (GET_CODE (x) == MEM)
7218 /* No `byte ptr' prefix for call instructions. */
7219 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7222 switch (GET_MODE_SIZE (GET_MODE (x)))
7224 case 1: size = "BYTE"; break;
7225 case 2: size = "WORD"; break;
7226 case 4: size = "DWORD"; break;
7227 case 8: size = "QWORD"; break;
7228 case 12: size = "XWORD"; break;
7229 case 16: size = "XMMWORD"; break;
7234 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7237 else if (code == 'w')
7239 else if (code == 'k')
7243 fputs (" PTR ", file);
7247 if (flag_pic && CONSTANT_ADDRESS_P (x))
7248 output_pic_addr_const (file, x, code);
7249 /* Avoid (%rip) for call operands. */
7250 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7251 && GET_CODE (x) != CONST_INT)
7252 output_addr_const (file, x);
7253 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7254 output_operand_lossage ("invalid constraints for operand");
7259 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7264 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7265 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7267 if (ASSEMBLER_DIALECT == ASM_ATT)
7269 fprintf (file, "0x%lx", l);
7272 /* These float cases don't actually occur as immediate operands. */
7273 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7277 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7278 fprintf (file, "%s", dstr);
7281 else if (GET_CODE (x) == CONST_DOUBLE
7282 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7286 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7287 fprintf (file, "%s", dstr);
7294 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7296 if (ASSEMBLER_DIALECT == ASM_ATT)
7299 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7300 || GET_CODE (x) == LABEL_REF)
7302 if (ASSEMBLER_DIALECT == ASM_ATT)
7305 fputs ("OFFSET FLAT:", file);
7308 if (GET_CODE (x) == CONST_INT)
7309 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7311 output_pic_addr_const (file, x, code);
7313 output_addr_const (file, x);
7317 /* Print a memory operand whose address is ADDR. */
7320 print_operand_address (file, addr)
7324 struct ix86_address parts;
7325 rtx base, index, disp;
7328 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7330 if (ASSEMBLER_DIALECT == ASM_INTEL)
7331 fputs ("DWORD PTR ", file);
7332 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7335 fputs ("fs:0", file);
7337 fputs ("gs:0", file);
7341 if (! ix86_decompose_address (addr, &parts))
7345 index = parts.index;
7347 scale = parts.scale;
7349 if (!base && !index)
7351 /* Displacement only requires special attention. */
7353 if (GET_CODE (disp) == CONST_INT)
7355 if (ASSEMBLER_DIALECT == ASM_INTEL)
7357 if (USER_LABEL_PREFIX[0] == 0)
7359 fputs ("ds:", file);
7361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7364 output_pic_addr_const (file, addr, 0);
7366 output_addr_const (file, addr);
7368 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7370 && ((GET_CODE (addr) == SYMBOL_REF
7371 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7372 || GET_CODE (addr) == LABEL_REF
7373 || (GET_CODE (addr) == CONST
7374 && GET_CODE (XEXP (addr, 0)) == PLUS
7375 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7376 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7377 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7378 fputs ("(%rip)", file);
7382 if (ASSEMBLER_DIALECT == ASM_ATT)
7387 output_pic_addr_const (file, disp, 0);
7388 else if (GET_CODE (disp) == LABEL_REF)
7389 output_asm_label (disp);
7391 output_addr_const (file, disp);
7396 PRINT_REG (base, 0, file);
7400 PRINT_REG (index, 0, file);
7402 fprintf (file, ",%d", scale);
7408 rtx offset = NULL_RTX;
7412 /* Pull out the offset of a symbol; print any symbol itself. */
7413 if (GET_CODE (disp) == CONST
7414 && GET_CODE (XEXP (disp, 0)) == PLUS
7415 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7417 offset = XEXP (XEXP (disp, 0), 1);
7418 disp = gen_rtx_CONST (VOIDmode,
7419 XEXP (XEXP (disp, 0), 0));
7423 output_pic_addr_const (file, disp, 0);
7424 else if (GET_CODE (disp) == LABEL_REF)
7425 output_asm_label (disp);
7426 else if (GET_CODE (disp) == CONST_INT)
7429 output_addr_const (file, disp);
7435 PRINT_REG (base, 0, file);
7438 if (INTVAL (offset) >= 0)
7440 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7444 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7451 PRINT_REG (index, 0, file);
7453 fprintf (file, "*%d", scale);
7461 output_addr_const_extra (file, x)
7467 if (GET_CODE (x) != UNSPEC)
7470 op = XVECEXP (x, 0, 0);
7471 switch (XINT (x, 1))
7473 case UNSPEC_GOTTPOFF:
7474 output_addr_const (file, op);
7475 /* FIXME: This might be @TPOFF in Sun ld. */
7476 fputs ("@GOTTPOFF", file);
7479 output_addr_const (file, op);
7480 fputs ("@TPOFF", file);
7483 output_addr_const (file, op);
7485 fputs ("@TPOFF", file);
7487 fputs ("@NTPOFF", file);
7490 output_addr_const (file, op);
7491 fputs ("@DTPOFF", file);
7493 case UNSPEC_GOTNTPOFF:
7494 output_addr_const (file, op);
7496 fputs ("@GOTTPOFF(%rip)", file);
7498 fputs ("@GOTNTPOFF", file);
7500 case UNSPEC_INDNTPOFF:
7501 output_addr_const (file, op);
7502 fputs ("@INDNTPOFF", file);
7512 /* Split one or more DImode RTL references into pairs of SImode
7513 references. The RTL can be REG, offsettable MEM, integer constant, or
7514 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7515 split and "num" is its length. lo_half and hi_half are output arrays
7516 that parallel "operands". */
7519 split_di (operands, num, lo_half, hi_half)
7522 rtx lo_half[], hi_half[];
7526 rtx op = operands[num];
7528 /* simplify_subreg refuse to split volatile memory addresses,
7529 but we still have to handle it. */
7530 if (GET_CODE (op) == MEM)
7532 lo_half[num] = adjust_address (op, SImode, 0);
7533 hi_half[num] = adjust_address (op, SImode, 4);
7537 lo_half[num] = simplify_gen_subreg (SImode, op,
7538 GET_MODE (op) == VOIDmode
7539 ? DImode : GET_MODE (op), 0);
7540 hi_half[num] = simplify_gen_subreg (SImode, op,
7541 GET_MODE (op) == VOIDmode
7542 ? DImode : GET_MODE (op), 4);
7546 /* Split one or more TImode RTL references into pairs of SImode
7547 references. The RTL can be REG, offsettable MEM, integer constant, or
7548 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7549 split and "num" is its length. lo_half and hi_half are output arrays
7550 that parallel "operands". */
7553 split_ti (operands, num, lo_half, hi_half)
7556 rtx lo_half[], hi_half[];
7560 rtx op = operands[num];
7562 /* simplify_subreg refuse to split volatile memory addresses, but we
7563 still have to handle it. */
7564 if (GET_CODE (op) == MEM)
7566 lo_half[num] = adjust_address (op, DImode, 0);
7567 hi_half[num] = adjust_address (op, DImode, 8);
7571 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7572 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7577 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7578 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7579 is the expression of the binary operation. The output may either be
7580 emitted here, or returned to the caller, like all output_* functions.
7582 There is no guarantee that the operands are the same mode, as they
7583 might be within FLOAT or FLOAT_EXTEND expressions. */
7585 #ifndef SYSV386_COMPAT
7586 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7587 wants to fix the assemblers because that causes incompatibility
7588 with gcc. No-one wants to fix gcc because that causes
7589 incompatibility with assemblers... You can use the option of
7590 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7591 #define SYSV386_COMPAT 1
7595 output_387_binary_op (insn, operands)
7599 static char buf[30];
7602 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7604 #ifdef ENABLE_CHECKING
7605 /* Even if we do not want to check the inputs, this documents input
7606 constraints. Which helps in understanding the following code. */
7607 if (STACK_REG_P (operands[0])
7608 && ((REG_P (operands[1])
7609 && REGNO (operands[0]) == REGNO (operands[1])
7610 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7611 || (REG_P (operands[2])
7612 && REGNO (operands[0]) == REGNO (operands[2])
7613 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7614 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7620 switch (GET_CODE (operands[3]))
7623 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7624 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7632 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7633 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7641 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7642 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7650 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7651 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7665 if (GET_MODE (operands[0]) == SFmode)
7666 strcat (buf, "ss\t{%2, %0|%0, %2}");
7668 strcat (buf, "sd\t{%2, %0|%0, %2}");
7673 switch (GET_CODE (operands[3]))
7677 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7679 rtx temp = operands[2];
7680 operands[2] = operands[1];
7684 /* know operands[0] == operands[1]. */
7686 if (GET_CODE (operands[2]) == MEM)
7692 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7694 if (STACK_TOP_P (operands[0]))
7695 /* How is it that we are storing to a dead operand[2]?
7696 Well, presumably operands[1] is dead too. We can't
7697 store the result to st(0) as st(0) gets popped on this
7698 instruction. Instead store to operands[2] (which I
7699 think has to be st(1)). st(1) will be popped later.
7700 gcc <= 2.8.1 didn't have this check and generated
7701 assembly code that the Unixware assembler rejected. */
7702 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7704 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7708 if (STACK_TOP_P (operands[0]))
7709 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7711 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7716 if (GET_CODE (operands[1]) == MEM)
7722 if (GET_CODE (operands[2]) == MEM)
7728 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7731 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7732 derived assemblers, confusingly reverse the direction of
7733 the operation for fsub{r} and fdiv{r} when the
7734 destination register is not st(0). The Intel assembler
7735 doesn't have this brain damage. Read !SYSV386_COMPAT to
7736 figure out what the hardware really does. */
7737 if (STACK_TOP_P (operands[0]))
7738 p = "{p\t%0, %2|rp\t%2, %0}";
7740 p = "{rp\t%2, %0|p\t%0, %2}";
7742 if (STACK_TOP_P (operands[0]))
7743 /* As above for fmul/fadd, we can't store to st(0). */
7744 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7746 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7751 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7754 if (STACK_TOP_P (operands[0]))
7755 p = "{rp\t%0, %1|p\t%1, %0}";
7757 p = "{p\t%1, %0|rp\t%0, %1}";
7759 if (STACK_TOP_P (operands[0]))
7760 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7762 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7767 if (STACK_TOP_P (operands[0]))
7769 if (STACK_TOP_P (operands[1]))
7770 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7772 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7775 else if (STACK_TOP_P (operands[1]))
7778 p = "{\t%1, %0|r\t%0, %1}";
7780 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7786 p = "{r\t%2, %0|\t%0, %2}";
7788 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7801 /* Output code to initialize control word copies used by
7802 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7803 is set to control word rounding downwards. */
7805 emit_i387_cw_initialization (normal, round_down)
7806 rtx normal, round_down;
7808 rtx reg = gen_reg_rtx (HImode);
7810 emit_insn (gen_x86_fnstcw_1 (normal));
7811 emit_move_insn (reg, normal);
7812 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7814 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7816 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7817 emit_move_insn (round_down, reg);
7820 /* Output code for INSN to convert a float to a signed int. OPERANDS
7821 are the insn operands. The output may be [HSD]Imode and the input
7822 operand may be [SDX]Fmode. */
7825 output_fix_trunc (insn, operands)
7829 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7830 int dimode_p = GET_MODE (operands[0]) == DImode;
7832 /* Jump through a hoop or two for DImode, since the hardware has no
7833 non-popping instruction. We used to do this a different way, but
7834 that was somewhat fragile and broke with post-reload splitters. */
7835 if (dimode_p && !stack_top_dies)
7836 output_asm_insn ("fld\t%y1", operands);
7838 if (!STACK_TOP_P (operands[1]))
7841 if (GET_CODE (operands[0]) != MEM)
7844 output_asm_insn ("fldcw\t%3", operands);
7845 if (stack_top_dies || dimode_p)
7846 output_asm_insn ("fistp%z0\t%0", operands);
7848 output_asm_insn ("fist%z0\t%0", operands);
7849 output_asm_insn ("fldcw\t%2", operands);
7854 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7855 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7856 when fucom should be used. */
7859 output_fp_compare (insn, operands, eflags_p, unordered_p)
7862 int eflags_p, unordered_p;
7865 rtx cmp_op0 = operands[0];
7866 rtx cmp_op1 = operands[1];
7867 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7872 cmp_op1 = operands[2];
7876 if (GET_MODE (operands[0]) == SFmode)
7878 return "ucomiss\t{%1, %0|%0, %1}";
7880 return "comiss\t{%1, %0|%0, %1}";
7883 return "ucomisd\t{%1, %0|%0, %1}";
7885 return "comisd\t{%1, %0|%0, %1}";
7888 if (! STACK_TOP_P (cmp_op0))
7891 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7893 if (STACK_REG_P (cmp_op1)
7895 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7896 && REGNO (cmp_op1) != FIRST_STACK_REG)
7898 /* If both the top of the 387 stack dies, and the other operand
7899 is also a stack register that dies, then this must be a
7900 `fcompp' float compare */
7904 /* There is no double popping fcomi variant. Fortunately,
7905 eflags is immune from the fstp's cc clobbering. */
7907 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7909 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7917 return "fucompp\n\tfnstsw\t%0";
7919 return "fcompp\n\tfnstsw\t%0";
7932 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7934 static const char * const alt[24] =
7946 "fcomi\t{%y1, %0|%0, %y1}",
7947 "fcomip\t{%y1, %0|%0, %y1}",
7948 "fucomi\t{%y1, %0|%0, %y1}",
7949 "fucomip\t{%y1, %0|%0, %y1}",
7956 "fcom%z2\t%y2\n\tfnstsw\t%0",
7957 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7958 "fucom%z2\t%y2\n\tfnstsw\t%0",
7959 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7961 "ficom%z2\t%y2\n\tfnstsw\t%0",
7962 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7970 mask = eflags_p << 3;
7971 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7972 mask |= unordered_p << 1;
7973 mask |= stack_top_dies;
7986 ix86_output_addr_vec_elt (file, value)
7990 const char *directive = ASM_LONG;
7995 directive = ASM_QUAD;
8001 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8005 ix86_output_addr_diff_elt (file, value, rel)
8010 fprintf (file, "%s%s%d-%s%d\n",
8011 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8012 else if (HAVE_AS_GOTOFF_IN_DATA)
8013 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8015 else if (TARGET_MACHO)
8016 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8017 machopic_function_base_name () + 1);
8020 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8021 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8024 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8028 ix86_expand_clear (dest)
8033 /* We play register width games, which are only valid after reload. */
8034 if (!reload_completed)
8037 /* Avoid HImode and its attendant prefix byte. */
8038 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8039 dest = gen_rtx_REG (SImode, REGNO (dest));
8041 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8043 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8044 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8046 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8047 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8053 /* X is an unchanging MEM. If it is a constant pool reference, return
8054 the constant pool rtx, else NULL. */
8057 maybe_get_pool_constant (x)
8060 x = ix86_delegitimize_address (XEXP (x, 0));
8062 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8063 return get_pool_constant (x);
8069 ix86_expand_move (mode, operands)
8070 enum machine_mode mode;
8073 int strict = (reload_in_progress || reload_completed);
8074 rtx insn, op0, op1, tmp;
8079 if (tls_symbolic_operand (op1, Pmode))
8081 op1 = legitimize_address (op1, op1, VOIDmode);
8082 if (GET_CODE (op0) == MEM)
8084 tmp = gen_reg_rtx (mode);
8085 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8089 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8094 rtx temp = ((reload_in_progress
8095 || ((op0 && GET_CODE (op0) == REG)
8097 ? op0 : gen_reg_rtx (Pmode));
8098 op1 = machopic_indirect_data_reference (op1, temp);
8099 op1 = machopic_legitimize_pic_address (op1, mode,
8100 temp == op1 ? 0 : temp);
8104 if (MACHOPIC_INDIRECT)
8105 op1 = machopic_indirect_data_reference (op1, 0);
8109 insn = gen_rtx_SET (VOIDmode, op0, op1);
8113 #endif /* TARGET_MACHO */
8114 if (GET_CODE (op0) == MEM)
8115 op1 = force_reg (Pmode, op1);
8119 if (GET_CODE (temp) != REG)
8120 temp = gen_reg_rtx (Pmode);
8121 temp = legitimize_pic_address (op1, temp);
8129 if (GET_CODE (op0) == MEM
8130 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8131 || !push_operand (op0, mode))
8132 && GET_CODE (op1) == MEM)
8133 op1 = force_reg (mode, op1);
8135 if (push_operand (op0, mode)
8136 && ! general_no_elim_operand (op1, mode))
8137 op1 = copy_to_mode_reg (mode, op1);
8139 /* Force large constants in 64bit compilation into register
8140 to get them CSEed. */
8141 if (TARGET_64BIT && mode == DImode
8142 && immediate_operand (op1, mode)
8143 && !x86_64_zero_extended_value (op1)
8144 && !register_operand (op0, mode)
8145 && optimize && !reload_completed && !reload_in_progress)
8146 op1 = copy_to_mode_reg (mode, op1);
8148 if (FLOAT_MODE_P (mode))
8150 /* If we are loading a floating point constant to a register,
8151 force the value to memory now, since we'll get better code
8152 out the back end. */
8156 else if (GET_CODE (op1) == CONST_DOUBLE
8157 && register_operand (op0, mode))
8158 op1 = validize_mem (force_const_mem (mode, op1));
8162 insn = gen_rtx_SET (VOIDmode, op0, op1);
8168 ix86_expand_vector_move (mode, operands)
8169 enum machine_mode mode;
8172 /* Force constants other than zero into memory. We do not know how
8173 the instructions used to build constants modify the upper 64 bits
8174 of the register, once we have that information we may be able
8175 to handle some of them more efficiently. */
8176 if ((reload_in_progress | reload_completed) == 0
8177 && register_operand (operands[0], mode)
8178 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8179 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8181 /* Make operand1 a register if it isn't already. */
8183 && !register_operand (operands[0], mode)
8184 && !register_operand (operands[1], mode))
8186 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8187 emit_move_insn (operands[0], temp);
8191 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8194 /* Attempt to expand a binary operator. Make the expansion closer to the
8195 actual machine, then just general_operand, which will allow 3 separate
8196 memory references (one output, two input) in a single insn. */
8199 ix86_expand_binary_operator (code, mode, operands)
8201 enum machine_mode mode;
8204 int matching_memory;
8205 rtx src1, src2, dst, op, clob;
8211 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8212 if (GET_RTX_CLASS (code) == 'c'
8213 && (rtx_equal_p (dst, src2)
8214 || immediate_operand (src1, mode)))
8221 /* If the destination is memory, and we do not have matching source
8222 operands, do things in registers. */
8223 matching_memory = 0;
8224 if (GET_CODE (dst) == MEM)
8226 if (rtx_equal_p (dst, src1))
8227 matching_memory = 1;
8228 else if (GET_RTX_CLASS (code) == 'c'
8229 && rtx_equal_p (dst, src2))
8230 matching_memory = 2;
8232 dst = gen_reg_rtx (mode);
8235 /* Both source operands cannot be in memory. */
8236 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8238 if (matching_memory != 2)
8239 src2 = force_reg (mode, src2);
8241 src1 = force_reg (mode, src1);
8244 /* If the operation is not commutable, source 1 cannot be a constant
8245 or non-matching memory. */
8246 if ((CONSTANT_P (src1)
8247 || (!matching_memory && GET_CODE (src1) == MEM))
8248 && GET_RTX_CLASS (code) != 'c')
8249 src1 = force_reg (mode, src1);
8251 /* If optimizing, copy to regs to improve CSE */
8252 if (optimize && ! no_new_pseudos)
8254 if (GET_CODE (dst) == MEM)
8255 dst = gen_reg_rtx (mode);
8256 if (GET_CODE (src1) == MEM)
8257 src1 = force_reg (mode, src1);
8258 if (GET_CODE (src2) == MEM)
8259 src2 = force_reg (mode, src2);
8262 /* Emit the instruction. */
8264 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8265 if (reload_in_progress)
8267 /* Reload doesn't know about the flags register, and doesn't know that
8268 it doesn't want to clobber it. We can only do this with PLUS. */
8275 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8276 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8279 /* Fix up the destination if needed. */
8280 if (dst != operands[0])
8281 emit_move_insn (operands[0], dst);
8284 /* Return TRUE or FALSE depending on whether the binary operator meets the
8285 appropriate constraints. */
8288 ix86_binary_operator_ok (code, mode, operands)
8290 enum machine_mode mode ATTRIBUTE_UNUSED;
8293 /* Both source operands cannot be in memory. */
8294 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8296 /* If the operation is not commutable, source 1 cannot be a constant. */
8297 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8299 /* If the destination is memory, we must have a matching source operand. */
8300 if (GET_CODE (operands[0]) == MEM
8301 && ! (rtx_equal_p (operands[0], operands[1])
8302 || (GET_RTX_CLASS (code) == 'c'
8303 && rtx_equal_p (operands[0], operands[2]))))
8305 /* If the operation is not commutable and the source 1 is memory, we must
8306 have a matching destination. */
8307 if (GET_CODE (operands[1]) == MEM
8308 && GET_RTX_CLASS (code) != 'c'
8309 && ! rtx_equal_p (operands[0], operands[1]))
8314 /* Attempt to expand a unary operator. Make the expansion closer to the
8315 actual machine, then just general_operand, which will allow 2 separate
8316 memory references (one output, one input) in a single insn. */
8319 ix86_expand_unary_operator (code, mode, operands)
8321 enum machine_mode mode;
8324 int matching_memory;
8325 rtx src, dst, op, clob;
8330 /* If the destination is memory, and we do not have matching source
8331 operands, do things in registers. */
8332 matching_memory = 0;
8333 if (GET_CODE (dst) == MEM)
8335 if (rtx_equal_p (dst, src))
8336 matching_memory = 1;
8338 dst = gen_reg_rtx (mode);
8341 /* When source operand is memory, destination must match. */
8342 if (!matching_memory && GET_CODE (src) == MEM)
8343 src = force_reg (mode, src);
8345 /* If optimizing, copy to regs to improve CSE */
8346 if (optimize && ! no_new_pseudos)
8348 if (GET_CODE (dst) == MEM)
8349 dst = gen_reg_rtx (mode);
8350 if (GET_CODE (src) == MEM)
8351 src = force_reg (mode, src);
8354 /* Emit the instruction. */
8356 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8357 if (reload_in_progress || code == NOT)
8359 /* Reload doesn't know about the flags register, and doesn't know that
8360 it doesn't want to clobber it. */
8367 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8368 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8371 /* Fix up the destination if needed. */
8372 if (dst != operands[0])
8373 emit_move_insn (operands[0], dst);
8376 /* Return TRUE or FALSE depending on whether the unary operator meets the
8377 appropriate constraints. */
8380 ix86_unary_operator_ok (code, mode, operands)
8381 enum rtx_code code ATTRIBUTE_UNUSED;
8382 enum machine_mode mode ATTRIBUTE_UNUSED;
8383 rtx operands[2] ATTRIBUTE_UNUSED;
8385 /* If one of operands is memory, source and destination must match. */
8386 if ((GET_CODE (operands[0]) == MEM
8387 || GET_CODE (operands[1]) == MEM)
8388 && ! rtx_equal_p (operands[0], operands[1]))
8393 /* Return TRUE or FALSE depending on whether the first SET in INSN
8394 has source and destination with matching CC modes, and that the
8395 CC mode is at least as constrained as REQ_MODE. */
8398 ix86_match_ccmode (insn, req_mode)
8400 enum machine_mode req_mode;
8403 enum machine_mode set_mode;
8405 set = PATTERN (insn);
8406 if (GET_CODE (set) == PARALLEL)
8407 set = XVECEXP (set, 0, 0);
8408 if (GET_CODE (set) != SET)
8410 if (GET_CODE (SET_SRC (set)) != COMPARE)
8413 set_mode = GET_MODE (SET_DEST (set));
8417 if (req_mode != CCNOmode
8418 && (req_mode != CCmode
8419 || XEXP (SET_SRC (set), 1) != const0_rtx))
8423 if (req_mode == CCGCmode)
8427 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8431 if (req_mode == CCZmode)
8441 return (GET_MODE (SET_SRC (set)) == set_mode);
8444 /* Generate insn patterns to do an integer compare of OPERANDS. */
8447 ix86_expand_int_compare (code, op0, op1)
8451 enum machine_mode cmpmode;
8454 cmpmode = SELECT_CC_MODE (code, op0, op1);
8455 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8457 /* This is very simple, but making the interface the same as in the
8458 FP case makes the rest of the code easier. */
8459 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8460 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8462 /* Return the test that should be put into the flags user, i.e.
8463 the bcc, scc, or cmov instruction. */
8464 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8467 /* Figure out whether to use ordered or unordered fp comparisons.
8468 Return the appropriate mode to use. */
8471 ix86_fp_compare_mode (code)
8472 enum rtx_code code ATTRIBUTE_UNUSED;
8474 /* ??? In order to make all comparisons reversible, we do all comparisons
8475 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8476 all forms trapping and nontrapping comparisons, we can make inequality
8477 comparisons trapping again, since it results in better code when using
8478 FCOM based compares. */
8479 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8483 ix86_cc_mode (code, op0, op1)
8487 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8488 return ix86_fp_compare_mode (code);
8491 /* Only zero flag is needed. */
8493 case NE: /* ZF!=0 */
8495 /* Codes needing carry flag. */
8496 case GEU: /* CF=0 */
8497 case GTU: /* CF=0 & ZF=0 */
8498 case LTU: /* CF=1 */
8499 case LEU: /* CF=1 | ZF=1 */
8501 /* Codes possibly doable only with sign flag when
8502 comparing against zero. */
8503 case GE: /* SF=OF or SF=0 */
8504 case LT: /* SF<>OF or SF=1 */
8505 if (op1 == const0_rtx)
8508 /* For other cases Carry flag is not required. */
8510 /* Codes doable only with sign flag when comparing
8511 against zero, but we miss jump instruction for it
8512 so we need to use relational tests against overflow
8513 that thus needs to be zero. */
8514 case GT: /* ZF=0 & SF=OF */
8515 case LE: /* ZF=1 | SF<>OF */
8516 if (op1 == const0_rtx)
8520 /* strcmp pattern do (use flags) and combine may ask us for proper
8529 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8532 ix86_use_fcomi_compare (code)
8533 enum rtx_code code ATTRIBUTE_UNUSED;
8535 enum rtx_code swapped_code = swap_condition (code);
8536 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8537 || (ix86_fp_comparison_cost (swapped_code)
8538 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8541 /* Swap, force into registers, or otherwise massage the two operands
8542 to a fp comparison. The operands are updated in place; the new
8543 comparison code is returned. */
8545 static enum rtx_code
8546 ix86_prepare_fp_compare_args (code, pop0, pop1)
8550 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8551 rtx op0 = *pop0, op1 = *pop1;
8552 enum machine_mode op_mode = GET_MODE (op0);
8553 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8555 /* All of the unordered compare instructions only work on registers.
8556 The same is true of the XFmode compare instructions. The same is
8557 true of the fcomi compare instructions. */
8560 && (fpcmp_mode == CCFPUmode
8561 || op_mode == XFmode
8562 || op_mode == TFmode
8563 || ix86_use_fcomi_compare (code)))
8565 op0 = force_reg (op_mode, op0);
8566 op1 = force_reg (op_mode, op1);
8570 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8571 things around if they appear profitable, otherwise force op0
8574 if (standard_80387_constant_p (op0) == 0
8575 || (GET_CODE (op0) == MEM
8576 && ! (standard_80387_constant_p (op1) == 0
8577 || GET_CODE (op1) == MEM)))
8580 tmp = op0, op0 = op1, op1 = tmp;
8581 code = swap_condition (code);
8584 if (GET_CODE (op0) != REG)
8585 op0 = force_reg (op_mode, op0);
8587 if (CONSTANT_P (op1))
8589 if (standard_80387_constant_p (op1))
8590 op1 = force_reg (op_mode, op1);
8592 op1 = validize_mem (force_const_mem (op_mode, op1));
8596 /* Try to rearrange the comparison to make it cheaper. */
8597 if (ix86_fp_comparison_cost (code)
8598 > ix86_fp_comparison_cost (swap_condition (code))
8599 && (GET_CODE (op1) == REG || !no_new_pseudos))
8602 tmp = op0, op0 = op1, op1 = tmp;
8603 code = swap_condition (code);
8604 if (GET_CODE (op0) != REG)
8605 op0 = force_reg (op_mode, op0);
8613 /* Convert comparison codes we use to represent FP comparison to integer
8614 code that will result in proper branch. Return UNKNOWN if no such code
8616 static enum rtx_code
8617 ix86_fp_compare_code_to_integer (code)
8647 /* Split comparison code CODE into comparisons we can do using branch
8648 instructions. BYPASS_CODE is comparison code for branch that will
8649 branch around FIRST_CODE and SECOND_CODE. If some of branches
8650 is not required, set value to NIL.
8651 We never require more than two branches. */
8653 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8654 enum rtx_code code, *bypass_code, *first_code, *second_code;
8660 /* The fcomi comparison sets flags as follows:
8670 case GT: /* GTU - CF=0 & ZF=0 */
8671 case GE: /* GEU - CF=0 */
8672 case ORDERED: /* PF=0 */
8673 case UNORDERED: /* PF=1 */
8674 case UNEQ: /* EQ - ZF=1 */
8675 case UNLT: /* LTU - CF=1 */
8676 case UNLE: /* LEU - CF=1 | ZF=1 */
8677 case LTGT: /* EQ - ZF=0 */
8679 case LT: /* LTU - CF=1 - fails on unordered */
8681 *bypass_code = UNORDERED;
8683 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8685 *bypass_code = UNORDERED;
8687 case EQ: /* EQ - ZF=1 - fails on unordered */
8689 *bypass_code = UNORDERED;
8691 case NE: /* NE - ZF=0 - fails on unordered */
8693 *second_code = UNORDERED;
8695 case UNGE: /* GEU - CF=0 - fails on unordered */
8697 *second_code = UNORDERED;
8699 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8701 *second_code = UNORDERED;
8706 if (!TARGET_IEEE_FP)
8713 /* Return cost of comparison done fcom + arithmetics operations on AX.
8714 All following functions do use number of instructions as a cost metrics.
8715 In future this should be tweaked to compute bytes for optimize_size and
8716 take into account performance of various instructions on various CPUs. */
8718 ix86_fp_comparison_arithmetics_cost (code)
8721 if (!TARGET_IEEE_FP)
8723 /* The cost of code output by ix86_expand_fp_compare. */
8751 /* Return cost of comparison done using fcomi operation.
8752 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8754 ix86_fp_comparison_fcomi_cost (code)
8757 enum rtx_code bypass_code, first_code, second_code;
8758 /* Return arbitrarily high cost when instruction is not supported - this
8759 prevents gcc from using it. */
8762 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8763 return (bypass_code != NIL || second_code != NIL) + 2;
8766 /* Return cost of comparison done using sahf operation.
8767 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8769 ix86_fp_comparison_sahf_cost (code)
8772 enum rtx_code bypass_code, first_code, second_code;
8773 /* Return arbitrarily high cost when instruction is not preferred - this
8774 avoids gcc from using it. */
8775 if (!TARGET_USE_SAHF && !optimize_size)
8777 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8778 return (bypass_code != NIL || second_code != NIL) + 3;
8781 /* Compute cost of the comparison done using any method.
8782 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8784 ix86_fp_comparison_cost (code)
8787 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8790 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8791 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8793 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8794 if (min > sahf_cost)
8796 if (min > fcomi_cost)
8801 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8804 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8806 rtx op0, op1, scratch;
8810 enum machine_mode fpcmp_mode, intcmp_mode;
8812 int cost = ix86_fp_comparison_cost (code);
8813 enum rtx_code bypass_code, first_code, second_code;
8815 fpcmp_mode = ix86_fp_compare_mode (code);
8816 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8819 *second_test = NULL_RTX;
8821 *bypass_test = NULL_RTX;
8823 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8825 /* Do fcomi/sahf based test when profitable. */
8826 if ((bypass_code == NIL || bypass_test)
8827 && (second_code == NIL || second_test)
8828 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8832 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8833 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8839 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8840 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8842 scratch = gen_reg_rtx (HImode);
8843 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8844 emit_insn (gen_x86_sahf_1 (scratch));
8847 /* The FP codes work out to act like unsigned. */
8848 intcmp_mode = fpcmp_mode;
8850 if (bypass_code != NIL)
8851 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8852 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8854 if (second_code != NIL)
8855 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8856 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8861 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8862 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8863 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8865 scratch = gen_reg_rtx (HImode);
8866 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8868 /* In the unordered case, we have to check C2 for NaN's, which
8869 doesn't happen to work out to anything nice combination-wise.
8870 So do some bit twiddling on the value we've got in AH to come
8871 up with an appropriate set of condition codes. */
8873 intcmp_mode = CCNOmode;
8878 if (code == GT || !TARGET_IEEE_FP)
8880 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8885 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8886 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8887 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8888 intcmp_mode = CCmode;
8894 if (code == LT && TARGET_IEEE_FP)
8896 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8897 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8898 intcmp_mode = CCmode;
8903 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8909 if (code == GE || !TARGET_IEEE_FP)
8911 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8916 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8917 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8924 if (code == LE && TARGET_IEEE_FP)
8926 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8927 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8928 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8929 intcmp_mode = CCmode;
8934 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8940 if (code == EQ && TARGET_IEEE_FP)
8942 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8943 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8944 intcmp_mode = CCmode;
8949 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8956 if (code == NE && TARGET_IEEE_FP)
8958 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8959 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8965 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8971 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8975 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8984 /* Return the test that should be put into the flags user, i.e.
8985 the bcc, scc, or cmov instruction. */
8986 return gen_rtx_fmt_ee (code, VOIDmode,
8987 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8992 ix86_expand_compare (code, second_test, bypass_test)
8994 rtx *second_test, *bypass_test;
8997 op0 = ix86_compare_op0;
8998 op1 = ix86_compare_op1;
9001 *second_test = NULL_RTX;
9003 *bypass_test = NULL_RTX;
9005 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9006 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9007 second_test, bypass_test);
9009 ret = ix86_expand_int_compare (code, op0, op1);
9014 /* Return true if the CODE will result in nontrivial jump sequence. */
9016 ix86_fp_jump_nontrivial_p (code)
9019 enum rtx_code bypass_code, first_code, second_code;
9022 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9023 return bypass_code != NIL || second_code != NIL;
9027 ix86_expand_branch (code, label)
9033 switch (GET_MODE (ix86_compare_op0))
9039 tmp = ix86_expand_compare (code, NULL, NULL);
9040 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9041 gen_rtx_LABEL_REF (VOIDmode, label),
9043 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9053 enum rtx_code bypass_code, first_code, second_code;
9055 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9058 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9060 /* Check whether we will use the natural sequence with one jump. If
9061 so, we can expand jump early. Otherwise delay expansion by
9062 creating compound insn to not confuse optimizers. */
9063 if (bypass_code == NIL && second_code == NIL
9066 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9067 gen_rtx_LABEL_REF (VOIDmode, label),
9072 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9073 ix86_compare_op0, ix86_compare_op1);
9074 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9075 gen_rtx_LABEL_REF (VOIDmode, label),
9077 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9079 use_fcomi = ix86_use_fcomi_compare (code);
9080 vec = rtvec_alloc (3 + !use_fcomi);
9081 RTVEC_ELT (vec, 0) = tmp;
9083 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9085 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9088 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9090 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9098 /* Expand DImode branch into multiple compare+branch. */
9100 rtx lo[2], hi[2], label2;
9101 enum rtx_code code1, code2, code3;
9103 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9105 tmp = ix86_compare_op0;
9106 ix86_compare_op0 = ix86_compare_op1;
9107 ix86_compare_op1 = tmp;
9108 code = swap_condition (code);
9110 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9111 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9113 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9114 avoid two branches. This costs one extra insn, so disable when
9115 optimizing for size. */
9117 if ((code == EQ || code == NE)
9119 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9124 if (hi[1] != const0_rtx)
9125 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9126 NULL_RTX, 0, OPTAB_WIDEN);
9129 if (lo[1] != const0_rtx)
9130 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9131 NULL_RTX, 0, OPTAB_WIDEN);
9133 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9134 NULL_RTX, 0, OPTAB_WIDEN);
9136 ix86_compare_op0 = tmp;
9137 ix86_compare_op1 = const0_rtx;
9138 ix86_expand_branch (code, label);
9142 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9143 op1 is a constant and the low word is zero, then we can just
9144 examine the high word. */
9146 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9149 case LT: case LTU: case GE: case GEU:
9150 ix86_compare_op0 = hi[0];
9151 ix86_compare_op1 = hi[1];
9152 ix86_expand_branch (code, label);
9158 /* Otherwise, we need two or three jumps. */
9160 label2 = gen_label_rtx ();
9163 code2 = swap_condition (code);
9164 code3 = unsigned_condition (code);
9168 case LT: case GT: case LTU: case GTU:
9171 case LE: code1 = LT; code2 = GT; break;
9172 case GE: code1 = GT; code2 = LT; break;
9173 case LEU: code1 = LTU; code2 = GTU; break;
9174 case GEU: code1 = GTU; code2 = LTU; break;
9176 case EQ: code1 = NIL; code2 = NE; break;
9177 case NE: code2 = NIL; break;
9185 * if (hi(a) < hi(b)) goto true;
9186 * if (hi(a) > hi(b)) goto false;
9187 * if (lo(a) < lo(b)) goto true;
9191 ix86_compare_op0 = hi[0];
9192 ix86_compare_op1 = hi[1];
9195 ix86_expand_branch (code1, label);
9197 ix86_expand_branch (code2, label2);
9199 ix86_compare_op0 = lo[0];
9200 ix86_compare_op1 = lo[1];
9201 ix86_expand_branch (code3, label);
9204 emit_label (label2);
9213 /* Split branch based on floating point condition. */
9215 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9217 rtx op1, op2, target1, target2, tmp;
9220 rtx label = NULL_RTX;
9222 int bypass_probability = -1, second_probability = -1, probability = -1;
9225 if (target2 != pc_rtx)
9228 code = reverse_condition_maybe_unordered (code);
9233 condition = ix86_expand_fp_compare (code, op1, op2,
9234 tmp, &second, &bypass);
9236 if (split_branch_probability >= 0)
9238 /* Distribute the probabilities across the jumps.
9239 Assume the BYPASS and SECOND to be always test
9241 probability = split_branch_probability;
9243 /* Value of 1 is low enough to make no need for probability
9244 to be updated. Later we may run some experiments and see
9245 if unordered values are more frequent in practice. */
9247 bypass_probability = 1;
9249 second_probability = 1;
9251 if (bypass != NULL_RTX)
9253 label = gen_label_rtx ();
9254 i = emit_jump_insn (gen_rtx_SET
9256 gen_rtx_IF_THEN_ELSE (VOIDmode,
9258 gen_rtx_LABEL_REF (VOIDmode,
9261 if (bypass_probability >= 0)
9263 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9264 GEN_INT (bypass_probability),
9267 i = emit_jump_insn (gen_rtx_SET
9269 gen_rtx_IF_THEN_ELSE (VOIDmode,
9270 condition, target1, target2)));
9271 if (probability >= 0)
9273 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9274 GEN_INT (probability),
9276 if (second != NULL_RTX)
9278 i = emit_jump_insn (gen_rtx_SET
9280 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9282 if (second_probability >= 0)
9284 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9285 GEN_INT (second_probability),
9288 if (label != NULL_RTX)
9293 ix86_expand_setcc (code, dest)
9297 rtx ret, tmp, tmpreg;
9298 rtx second_test, bypass_test;
9300 if (GET_MODE (ix86_compare_op0) == DImode
9302 return 0; /* FAIL */
9304 if (GET_MODE (dest) != QImode)
9307 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9308 PUT_MODE (ret, QImode);
9313 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9314 if (bypass_test || second_test)
9316 rtx test = second_test;
9318 rtx tmp2 = gen_reg_rtx (QImode);
9325 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9327 PUT_MODE (test, QImode);
9328 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9331 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9333 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9336 return 1; /* DONE */
9339 /* Expand comparison setting or clearing carry flag. Return true when successful
9340 and set pop for the operation. */
9342 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9346 enum machine_mode mode =
9347 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9349 /* Do not handle DImode compares that go trought special path. Also we can't
9350 deal with FP compares yet. This is possible to add. */
9351 if ((mode == DImode && !TARGET_64BIT))
9353 if (FLOAT_MODE_P (mode))
9355 rtx second_test = NULL, bypass_test = NULL;
9356 rtx compare_op, compare_seq;
9358 /* Shortcut: following common codes never translate into carry flag compares. */
9359 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9360 || code == ORDERED || code == UNORDERED)
9363 /* These comparisons require zero flag; swap operands so they won't. */
9364 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9370 code = swap_condition (code);
9373 /* Try to expand the comparsion and verify that we end up with carry flag
9374 based comparsion. This is fails to be true only when we decide to expand
9375 comparsion using arithmetic that is not too common scenario. */
9377 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9378 &second_test, &bypass_test);
9379 compare_seq = get_insns ();
9382 if (second_test || bypass_test)
9384 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9385 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9386 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9388 code = GET_CODE (compare_op);
9389 if (code != LTU && code != GEU)
9391 emit_insn (compare_seq);
9395 if (!INTEGRAL_MODE_P (mode))
9403 /* Convert a==0 into (unsigned)a<1. */
9406 if (op1 != const0_rtx)
9409 code = (code == EQ ? LTU : GEU);
9412 /* Convert a>b into b<a or a>=b-1. */
9415 if (GET_CODE (op1) == CONST_INT)
9417 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9418 /* Bail out on overflow. We still can swap operands but that
9419 would force loading of the constant into register. */
9420 if (op1 == const0_rtx
9421 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9423 code = (code == GTU ? GEU : LTU);
9430 code = (code == GTU ? LTU : GEU);
9434 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9437 if (mode == DImode || op1 != const0_rtx)
9439 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9440 code = (code == LT ? GEU : LTU);
9444 if (mode == DImode || op1 != constm1_rtx)
9446 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9447 code = (code == LE ? GEU : LTU);
9453 ix86_compare_op0 = op0;
9454 ix86_compare_op1 = op1;
9455 *pop = ix86_expand_compare (code, NULL, NULL);
9456 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9462 ix86_expand_int_movcc (operands)
9465 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9466 rtx compare_seq, compare_op;
9467 rtx second_test, bypass_test;
9468 enum machine_mode mode = GET_MODE (operands[0]);
9469 bool sign_bit_compare_p = false;;
9472 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9473 compare_seq = get_insns ();
9476 compare_code = GET_CODE (compare_op);
9478 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9479 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9480 sign_bit_compare_p = true;
9482 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9483 HImode insns, we'd be swallowed in word prefix ops. */
9485 if ((mode != HImode || TARGET_FAST_PREFIX)
9486 && (mode != DImode || TARGET_64BIT)
9487 && GET_CODE (operands[2]) == CONST_INT
9488 && GET_CODE (operands[3]) == CONST_INT)
9490 rtx out = operands[0];
9491 HOST_WIDE_INT ct = INTVAL (operands[2]);
9492 HOST_WIDE_INT cf = INTVAL (operands[3]);
9496 /* Sign bit compares are better done using shifts than we do by using
9498 if (sign_bit_compare_p
9499 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9500 ix86_compare_op1, &compare_op))
9502 /* Detect overlap between destination and compare sources. */
9505 if (!sign_bit_compare_p)
9509 compare_code = GET_CODE (compare_op);
9511 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9512 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9515 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9518 /* To simplify rest of code, restrict to the GEU case. */
9519 if (compare_code == LTU)
9521 HOST_WIDE_INT tmp = ct;
9524 compare_code = reverse_condition (compare_code);
9525 code = reverse_condition (code);
9530 PUT_CODE (compare_op,
9531 reverse_condition_maybe_unordered
9532 (GET_CODE (compare_op)));
9534 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9538 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9539 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9540 tmp = gen_reg_rtx (mode);
9543 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9545 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9549 if (code == GT || code == GE)
9550 code = reverse_condition (code);
9553 HOST_WIDE_INT tmp = ct;
9558 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9559 ix86_compare_op1, VOIDmode, 0, -1);
9572 tmp = expand_simple_binop (mode, PLUS,
9574 copy_rtx (tmp), 1, OPTAB_DIRECT);
9585 tmp = expand_simple_binop (mode, IOR,
9587 copy_rtx (tmp), 1, OPTAB_DIRECT);
9589 else if (diff == -1 && ct)
9599 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9601 tmp = expand_simple_binop (mode, PLUS,
9602 copy_rtx (tmp), GEN_INT (cf),
9603 copy_rtx (tmp), 1, OPTAB_DIRECT);
9611 * andl cf - ct, dest
9621 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9624 tmp = expand_simple_binop (mode, AND,
9626 gen_int_mode (cf - ct, mode),
9627 copy_rtx (tmp), 1, OPTAB_DIRECT);
9629 tmp = expand_simple_binop (mode, PLUS,
9630 copy_rtx (tmp), GEN_INT (ct),
9631 copy_rtx (tmp), 1, OPTAB_DIRECT);
9634 if (!rtx_equal_p (tmp, out))
9635 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9637 return 1; /* DONE */
9643 tmp = ct, ct = cf, cf = tmp;
9645 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9647 /* We may be reversing unordered compare to normal compare, that
9648 is not valid in general (we may convert non-trapping condition
9649 to trapping one), however on i386 we currently emit all
9650 comparisons unordered. */
9651 compare_code = reverse_condition_maybe_unordered (compare_code);
9652 code = reverse_condition_maybe_unordered (code);
9656 compare_code = reverse_condition (compare_code);
9657 code = reverse_condition (code);
9662 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9663 && GET_CODE (ix86_compare_op1) == CONST_INT)
9665 if (ix86_compare_op1 == const0_rtx
9666 && (code == LT || code == GE))
9667 compare_code = code;
9668 else if (ix86_compare_op1 == constm1_rtx)
9672 else if (code == GT)
9677 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9678 if (compare_code != NIL
9679 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9680 && (cf == -1 || ct == -1))
9682 /* If lea code below could be used, only optimize
9683 if it results in a 2 insn sequence. */
9685 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9686 || diff == 3 || diff == 5 || diff == 9)
9687 || (compare_code == LT && ct == -1)
9688 || (compare_code == GE && cf == -1))
9691 * notl op1 (if necessary)
9699 code = reverse_condition (code);
9702 out = emit_store_flag (out, code, ix86_compare_op0,
9703 ix86_compare_op1, VOIDmode, 0, -1);
9705 out = expand_simple_binop (mode, IOR,
9707 out, 1, OPTAB_DIRECT);
9708 if (out != operands[0])
9709 emit_move_insn (operands[0], out);
9711 return 1; /* DONE */
9716 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9717 || diff == 3 || diff == 5 || diff == 9)
9718 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9719 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9725 * lea cf(dest*(ct-cf)),dest
9729 * This also catches the degenerate setcc-only case.
9735 out = emit_store_flag (out, code, ix86_compare_op0,
9736 ix86_compare_op1, VOIDmode, 0, 1);
9739 /* On x86_64 the lea instruction operates on Pmode, so we need
9740 to get arithmetics done in proper mode to match. */
9742 tmp = copy_rtx (out);
9746 out1 = copy_rtx (out);
9747 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9751 tmp = gen_rtx_PLUS (mode, tmp, out1);
9757 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9760 if (!rtx_equal_p (tmp, out))
9763 out = force_operand (tmp, copy_rtx (out));
9765 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9767 if (!rtx_equal_p (out, operands[0]))
9768 emit_move_insn (operands[0], copy_rtx (out));
9770 return 1; /* DONE */
9774 * General case: Jumpful:
9775 * xorl dest,dest cmpl op1, op2
9776 * cmpl op1, op2 movl ct, dest
9778 * decl dest movl cf, dest
9779 * andl (cf-ct),dest 1:
9784 * This is reasonably steep, but branch mispredict costs are
9785 * high on modern cpus, so consider failing only if optimizing
9789 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9790 && BRANCH_COST >= 2)
9796 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9797 /* We may be reversing unordered compare to normal compare,
9798 that is not valid in general (we may convert non-trapping
9799 condition to trapping one), however on i386 we currently
9800 emit all comparisons unordered. */
9801 code = reverse_condition_maybe_unordered (code);
9804 code = reverse_condition (code);
9805 if (compare_code != NIL)
9806 compare_code = reverse_condition (compare_code);
9810 if (compare_code != NIL)
9812 /* notl op1 (if needed)
9817 For x < 0 (resp. x <= -1) there will be no notl,
9818 so if possible swap the constants to get rid of the
9820 True/false will be -1/0 while code below (store flag
9821 followed by decrement) is 0/-1, so the constants need
9822 to be exchanged once more. */
9824 if (compare_code == GE || !cf)
9826 code = reverse_condition (code);
9831 HOST_WIDE_INT tmp = cf;
9836 out = emit_store_flag (out, code, ix86_compare_op0,
9837 ix86_compare_op1, VOIDmode, 0, -1);
9841 out = emit_store_flag (out, code, ix86_compare_op0,
9842 ix86_compare_op1, VOIDmode, 0, 1);
9844 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9845 copy_rtx (out), 1, OPTAB_DIRECT);
9848 out = expand_simple_binop (mode, AND, copy_rtx (out),
9849 gen_int_mode (cf - ct, mode),
9850 copy_rtx (out), 1, OPTAB_DIRECT);
9852 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9853 copy_rtx (out), 1, OPTAB_DIRECT);
9854 if (!rtx_equal_p (out, operands[0]))
9855 emit_move_insn (operands[0], copy_rtx (out));
9857 return 1; /* DONE */
9861 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9863 /* Try a few things more with specific constants and a variable. */
9866 rtx var, orig_out, out, tmp;
9868 if (BRANCH_COST <= 2)
9869 return 0; /* FAIL */
9871 /* If one of the two operands is an interesting constant, load a
9872 constant with the above and mask it in with a logical operation. */
9874 if (GET_CODE (operands[2]) == CONST_INT)
9877 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9878 operands[3] = constm1_rtx, op = and_optab;
9879 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9880 operands[3] = const0_rtx, op = ior_optab;
9882 return 0; /* FAIL */
9884 else if (GET_CODE (operands[3]) == CONST_INT)
9887 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9888 operands[2] = constm1_rtx, op = and_optab;
9889 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9890 operands[2] = const0_rtx, op = ior_optab;
9892 return 0; /* FAIL */
9895 return 0; /* FAIL */
9897 orig_out = operands[0];
9898 tmp = gen_reg_rtx (mode);
9901 /* Recurse to get the constant loaded. */
9902 if (ix86_expand_int_movcc (operands) == 0)
9903 return 0; /* FAIL */
9905 /* Mask in the interesting variable. */
9906 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9908 if (!rtx_equal_p (out, orig_out))
9909 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9911 return 1; /* DONE */
9915 * For comparison with above,
9925 if (! nonimmediate_operand (operands[2], mode))
9926 operands[2] = force_reg (mode, operands[2]);
9927 if (! nonimmediate_operand (operands[3], mode))
9928 operands[3] = force_reg (mode, operands[3]);
9930 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9932 rtx tmp = gen_reg_rtx (mode);
9933 emit_move_insn (tmp, operands[3]);
9936 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9938 rtx tmp = gen_reg_rtx (mode);
9939 emit_move_insn (tmp, operands[2]);
9943 if (! register_operand (operands[2], VOIDmode)
9945 || ! register_operand (operands[3], VOIDmode)))
9946 operands[2] = force_reg (mode, operands[2]);
9949 && ! register_operand (operands[3], VOIDmode))
9950 operands[3] = force_reg (mode, operands[3]);
9952 emit_insn (compare_seq);
9953 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9954 gen_rtx_IF_THEN_ELSE (mode,
9955 compare_op, operands[2],
9958 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9959 gen_rtx_IF_THEN_ELSE (mode,
9961 copy_rtx (operands[3]),
9962 copy_rtx (operands[0]))));
9964 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9965 gen_rtx_IF_THEN_ELSE (mode,
9967 copy_rtx (operands[2]),
9968 copy_rtx (operands[0]))));
9970 return 1; /* DONE */
9974 ix86_expand_fp_movcc (operands)
9979 rtx compare_op, second_test, bypass_test;
9981 /* For SF/DFmode conditional moves based on comparisons
9982 in same mode, we may want to use SSE min/max instructions. */
9983 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9984 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9985 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9986 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9988 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9989 /* We may be called from the post-reload splitter. */
9990 && (!REG_P (operands[0])
9991 || SSE_REG_P (operands[0])
9992 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9994 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9995 code = GET_CODE (operands[1]);
9997 /* See if we have (cross) match between comparison operands and
9998 conditional move operands. */
9999 if (rtx_equal_p (operands[2], op1))
10004 code = reverse_condition_maybe_unordered (code);
10006 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10008 /* Check for min operation. */
10009 if (code == LT || code == UNLE)
10017 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10018 if (memory_operand (op0, VOIDmode))
10019 op0 = force_reg (GET_MODE (operands[0]), op0);
10020 if (GET_MODE (operands[0]) == SFmode)
10021 emit_insn (gen_minsf3 (operands[0], op0, op1));
10023 emit_insn (gen_mindf3 (operands[0], op0, op1));
10026 /* Check for max operation. */
10027 if (code == GT || code == UNGE)
10035 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10036 if (memory_operand (op0, VOIDmode))
10037 op0 = force_reg (GET_MODE (operands[0]), op0);
10038 if (GET_MODE (operands[0]) == SFmode)
10039 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10041 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10045 /* Manage condition to be sse_comparison_operator. In case we are
10046 in non-ieee mode, try to canonicalize the destination operand
10047 to be first in the comparison - this helps reload to avoid extra
10049 if (!sse_comparison_operator (operands[1], VOIDmode)
10050 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10052 rtx tmp = ix86_compare_op0;
10053 ix86_compare_op0 = ix86_compare_op1;
10054 ix86_compare_op1 = tmp;
10055 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10056 VOIDmode, ix86_compare_op0,
10059 /* Similarly try to manage result to be first operand of conditional
10060 move. We also don't support the NE comparison on SSE, so try to
10062 if ((rtx_equal_p (operands[0], operands[3])
10063 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10064 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10066 rtx tmp = operands[2];
10067 operands[2] = operands[3];
10069 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10070 (GET_CODE (operands[1])),
10071 VOIDmode, ix86_compare_op0,
10074 if (GET_MODE (operands[0]) == SFmode)
10075 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10076 operands[2], operands[3],
10077 ix86_compare_op0, ix86_compare_op1));
10079 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10080 operands[2], operands[3],
10081 ix86_compare_op0, ix86_compare_op1));
10085 /* The floating point conditional move instructions don't directly
10086 support conditions resulting from a signed integer comparison. */
10088 code = GET_CODE (operands[1]);
10089 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10091 /* The floating point conditional move instructions don't directly
10092 support signed integer comparisons. */
10094 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10096 if (second_test != NULL || bypass_test != NULL)
10098 tmp = gen_reg_rtx (QImode);
10099 ix86_expand_setcc (code, tmp);
10101 ix86_compare_op0 = tmp;
10102 ix86_compare_op1 = const0_rtx;
10103 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10105 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10107 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10108 emit_move_insn (tmp, operands[3]);
10111 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10113 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10114 emit_move_insn (tmp, operands[2]);
10118 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10119 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10124 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10125 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10130 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10131 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10139 /* Expand conditional increment or decrement using adb/sbb instructions.
10140 The default case using setcc followed by the conditional move can be
10141 done by generic code. */
10143 ix86_expand_int_addcc (operands)
10146 enum rtx_code code = GET_CODE (operands[1]);
10148 rtx val = const0_rtx;
10149 bool fpcmp = false;
10150 enum machine_mode mode = GET_MODE (operands[0]);
10152 if (operands[3] != const1_rtx
10153 && operands[3] != constm1_rtx)
10155 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10156 ix86_compare_op1, &compare_op))
10158 code = GET_CODE (compare_op);
10160 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10161 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10164 code = ix86_fp_compare_code_to_integer (code);
10171 PUT_CODE (compare_op,
10172 reverse_condition_maybe_unordered
10173 (GET_CODE (compare_op)));
10175 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10177 PUT_MODE (compare_op, mode);
10179 /* Construct either adc or sbb insn. */
10180 if ((code == LTU) == (operands[3] == constm1_rtx))
10182 switch (GET_MODE (operands[0]))
10185 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10188 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10191 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10194 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10202 switch (GET_MODE (operands[0]))
10205 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10208 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10211 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10214 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10220 return 1; /* DONE */
10224 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10225 works for floating pointer parameters and nonoffsetable memories.
10226 For pushes, it returns just stack offsets; the values will be saved
10227 in the right order. Maximally three parts are generated. */
10230 ix86_split_to_parts (operand, parts, mode)
10233 enum machine_mode mode;
10238 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10240 size = (GET_MODE_SIZE (mode) + 4) / 8;
10242 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10244 if (size < 2 || size > 3)
10247 /* Optimize constant pool reference to immediates. This is used by fp
10248 moves, that force all constants to memory to allow combining. */
10249 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10251 rtx tmp = maybe_get_pool_constant (operand);
10256 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10258 /* The only non-offsetable memories we handle are pushes. */
10259 if (! push_operand (operand, VOIDmode))
10262 operand = copy_rtx (operand);
10263 PUT_MODE (operand, Pmode);
10264 parts[0] = parts[1] = parts[2] = operand;
10266 else if (!TARGET_64BIT)
10268 if (mode == DImode)
10269 split_di (&operand, 1, &parts[0], &parts[1]);
10272 if (REG_P (operand))
10274 if (!reload_completed)
10276 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10277 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10279 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10281 else if (offsettable_memref_p (operand))
10283 operand = adjust_address (operand, SImode, 0);
10284 parts[0] = operand;
10285 parts[1] = adjust_address (operand, SImode, 4);
10287 parts[2] = adjust_address (operand, SImode, 8);
10289 else if (GET_CODE (operand) == CONST_DOUBLE)
10294 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10299 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10300 parts[2] = gen_int_mode (l[2], SImode);
10303 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10308 parts[1] = gen_int_mode (l[1], SImode);
10309 parts[0] = gen_int_mode (l[0], SImode);
10317 if (mode == TImode)
10318 split_ti (&operand, 1, &parts[0], &parts[1]);
10319 if (mode == XFmode || mode == TFmode)
10321 if (REG_P (operand))
10323 if (!reload_completed)
10325 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10326 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10328 else if (offsettable_memref_p (operand))
10330 operand = adjust_address (operand, DImode, 0);
10331 parts[0] = operand;
10332 parts[1] = adjust_address (operand, SImode, 8);
10334 else if (GET_CODE (operand) == CONST_DOUBLE)
10339 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10340 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10341 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10342 if (HOST_BITS_PER_WIDE_INT >= 64)
10345 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10346 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10349 parts[0] = immed_double_const (l[0], l[1], DImode);
10350 parts[1] = gen_int_mode (l[2], SImode);
10360 /* Emit insns to perform a move or push of DI, DF, and XF values.
10361 Return false when normal moves are needed; true when all required
10362 insns have been emitted. Operands 2-4 contain the input values
10363 int the correct order; operands 5-7 contain the output values. */
10366 ix86_split_long_move (operands)
10372 int collisions = 0;
10373 enum machine_mode mode = GET_MODE (operands[0]);
10375 /* The DFmode expanders may ask us to move double.
10376 For 64bit target this is single move. By hiding the fact
10377 here we simplify i386.md splitters. */
10378 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10380 /* Optimize constant pool reference to immediates. This is used by
10381 fp moves, that force all constants to memory to allow combining. */
10383 if (GET_CODE (operands[1]) == MEM
10384 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10385 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10386 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10387 if (push_operand (operands[0], VOIDmode))
10389 operands[0] = copy_rtx (operands[0]);
10390 PUT_MODE (operands[0], Pmode);
10393 operands[0] = gen_lowpart (DImode, operands[0]);
10394 operands[1] = gen_lowpart (DImode, operands[1]);
10395 emit_move_insn (operands[0], operands[1]);
10399 /* The only non-offsettable memory we handle is push. */
10400 if (push_operand (operands[0], VOIDmode))
10402 else if (GET_CODE (operands[0]) == MEM
10403 && ! offsettable_memref_p (operands[0]))
10406 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10407 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10409 /* When emitting push, take care for source operands on the stack. */
10410 if (push && GET_CODE (operands[1]) == MEM
10411 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10414 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10415 XEXP (part[1][2], 0));
10416 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10417 XEXP (part[1][1], 0));
10420 /* We need to do copy in the right order in case an address register
10421 of the source overlaps the destination. */
10422 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10424 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10426 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10429 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10432 /* Collision in the middle part can be handled by reordering. */
10433 if (collisions == 1 && nparts == 3
10434 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10437 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10438 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10441 /* If there are more collisions, we can't handle it by reordering.
10442 Do an lea to the last part and use only one colliding move. */
10443 else if (collisions > 1)
10446 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10447 XEXP (part[1][0], 0)));
10448 part[1][0] = change_address (part[1][0],
10449 TARGET_64BIT ? DImode : SImode,
10450 part[0][nparts - 1]);
10451 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10453 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10463 /* We use only first 12 bytes of TFmode value, but for pushing we
10464 are required to adjust stack as if we were pushing real 16byte
10466 if (mode == TFmode && !TARGET_64BIT)
10467 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10469 emit_move_insn (part[0][2], part[1][2]);
10474 /* In 64bit mode we don't have 32bit push available. In case this is
10475 register, it is OK - we will just use larger counterpart. We also
10476 retype memory - these comes from attempt to avoid REX prefix on
10477 moving of second half of TFmode value. */
10478 if (GET_MODE (part[1][1]) == SImode)
10480 if (GET_CODE (part[1][1]) == MEM)
10481 part[1][1] = adjust_address (part[1][1], DImode, 0);
10482 else if (REG_P (part[1][1]))
10483 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10486 if (GET_MODE (part[1][0]) == SImode)
10487 part[1][0] = part[1][1];
10490 emit_move_insn (part[0][1], part[1][1]);
10491 emit_move_insn (part[0][0], part[1][0]);
10495 /* Choose correct order to not overwrite the source before it is copied. */
10496 if ((REG_P (part[0][0])
10497 && REG_P (part[1][1])
10498 && (REGNO (part[0][0]) == REGNO (part[1][1])
10500 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10502 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10506 operands[2] = part[0][2];
10507 operands[3] = part[0][1];
10508 operands[4] = part[0][0];
10509 operands[5] = part[1][2];
10510 operands[6] = part[1][1];
10511 operands[7] = part[1][0];
10515 operands[2] = part[0][1];
10516 operands[3] = part[0][0];
10517 operands[5] = part[1][1];
10518 operands[6] = part[1][0];
10525 operands[2] = part[0][0];
10526 operands[3] = part[0][1];
10527 operands[4] = part[0][2];
10528 operands[5] = part[1][0];
10529 operands[6] = part[1][1];
10530 operands[7] = part[1][2];
10534 operands[2] = part[0][0];
10535 operands[3] = part[0][1];
10536 operands[5] = part[1][0];
10537 operands[6] = part[1][1];
10540 emit_move_insn (operands[2], operands[5]);
10541 emit_move_insn (operands[3], operands[6]);
10543 emit_move_insn (operands[4], operands[7]);
10549 ix86_split_ashldi (operands, scratch)
10550 rtx *operands, scratch;
10552 rtx low[2], high[2];
10555 if (GET_CODE (operands[2]) == CONST_INT)
10557 split_di (operands, 2, low, high);
10558 count = INTVAL (operands[2]) & 63;
10562 emit_move_insn (high[0], low[1]);
10563 emit_move_insn (low[0], const0_rtx);
10566 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10570 if (!rtx_equal_p (operands[0], operands[1]))
10571 emit_move_insn (operands[0], operands[1]);
10572 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10573 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10578 if (!rtx_equal_p (operands[0], operands[1]))
10579 emit_move_insn (operands[0], operands[1]);
10581 split_di (operands, 1, low, high);
10583 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10584 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10586 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10588 if (! no_new_pseudos)
10589 scratch = force_reg (SImode, const0_rtx);
10591 emit_move_insn (scratch, const0_rtx);
10593 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10597 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10602 ix86_split_ashrdi (operands, scratch)
10603 rtx *operands, scratch;
10605 rtx low[2], high[2];
10608 if (GET_CODE (operands[2]) == CONST_INT)
10610 split_di (operands, 2, low, high);
10611 count = INTVAL (operands[2]) & 63;
10615 emit_move_insn (low[0], high[1]);
10617 if (! reload_completed)
10618 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10621 emit_move_insn (high[0], low[0]);
10622 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10626 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10630 if (!rtx_equal_p (operands[0], operands[1]))
10631 emit_move_insn (operands[0], operands[1]);
10632 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10633 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10638 if (!rtx_equal_p (operands[0], operands[1]))
10639 emit_move_insn (operands[0], operands[1]);
10641 split_di (operands, 1, low, high);
10643 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10644 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10646 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10648 if (! no_new_pseudos)
10649 scratch = gen_reg_rtx (SImode);
10650 emit_move_insn (scratch, high[0]);
10651 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10652 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10656 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10661 ix86_split_lshrdi (operands, scratch)
10662 rtx *operands, scratch;
10664 rtx low[2], high[2];
10667 if (GET_CODE (operands[2]) == CONST_INT)
10669 split_di (operands, 2, low, high);
10670 count = INTVAL (operands[2]) & 63;
10674 emit_move_insn (low[0], high[1]);
10675 emit_move_insn (high[0], const0_rtx);
10678 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10682 if (!rtx_equal_p (operands[0], operands[1]))
10683 emit_move_insn (operands[0], operands[1]);
10684 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10685 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10690 if (!rtx_equal_p (operands[0], operands[1]))
10691 emit_move_insn (operands[0], operands[1]);
10693 split_di (operands, 1, low, high);
10695 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10696 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10698 /* Heh. By reversing the arguments, we can reuse this pattern. */
10699 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10701 if (! no_new_pseudos)
10702 scratch = force_reg (SImode, const0_rtx);
10704 emit_move_insn (scratch, const0_rtx);
10706 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10710 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10714 /* Helper function for the string operations below. Dest VARIABLE whether
10715 it is aligned to VALUE bytes. If true, jump to the label. */
10717 ix86_expand_aligntest (variable, value)
10721 rtx label = gen_label_rtx ();
10722 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10723 if (GET_MODE (variable) == DImode)
10724 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10726 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10727 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10732 /* Adjust COUNTER by the VALUE. */
10734 ix86_adjust_counter (countreg, value)
10736 HOST_WIDE_INT value;
10738 if (GET_MODE (countreg) == DImode)
10739 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10741 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10744 /* Zero extend possibly SImode EXP to Pmode register. */
10746 ix86_zero_extend_to_Pmode (exp)
10750 if (GET_MODE (exp) == VOIDmode)
10751 return force_reg (Pmode, exp);
10752 if (GET_MODE (exp) == Pmode)
10753 return copy_to_mode_reg (Pmode, exp);
10754 r = gen_reg_rtx (Pmode);
10755 emit_insn (gen_zero_extendsidi2 (r, exp));
10759 /* Expand string move (memcpy) operation. Use i386 string operations when
10760 profitable. expand_clrstr contains similar code. */
10762 ix86_expand_movstr (dst, src, count_exp, align_exp)
10763 rtx dst, src, count_exp, align_exp;
10765 rtx srcreg, destreg, countreg;
10766 enum machine_mode counter_mode;
10767 HOST_WIDE_INT align = 0;
10768 unsigned HOST_WIDE_INT count = 0;
10771 if (GET_CODE (align_exp) == CONST_INT)
10772 align = INTVAL (align_exp);
10774 /* Can't use any of this if the user has appropriated esi or edi. */
10775 if (global_regs[4] || global_regs[5])
10778 /* This simple hack avoids all inlining code and simplifies code below. */
10779 if (!TARGET_ALIGN_STRINGOPS)
10782 if (GET_CODE (count_exp) == CONST_INT)
10784 count = INTVAL (count_exp);
10785 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10789 /* Figure out proper mode for counter. For 32bits it is always SImode,
10790 for 64bits use SImode when possible, otherwise DImode.
10791 Set count to number of bytes copied when known at compile time. */
10792 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10793 || x86_64_zero_extended_value (count_exp))
10794 counter_mode = SImode;
10796 counter_mode = DImode;
10800 if (counter_mode != SImode && counter_mode != DImode)
10803 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10804 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10806 emit_insn (gen_cld ());
10808 /* When optimizing for size emit simple rep ; movsb instruction for
10809 counts not divisible by 4. */
10811 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10813 countreg = ix86_zero_extend_to_Pmode (count_exp);
10815 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10816 destreg, srcreg, countreg));
10818 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10819 destreg, srcreg, countreg));
10822 /* For constant aligned (or small unaligned) copies use rep movsl
10823 followed by code copying the rest. For PentiumPro ensure 8 byte
10824 alignment to allow rep movsl acceleration. */
10826 else if (count != 0
10828 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10829 || optimize_size || count < (unsigned int) 64))
10831 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10832 if (count & ~(size - 1))
10834 countreg = copy_to_mode_reg (counter_mode,
10835 GEN_INT ((count >> (size == 4 ? 2 : 3))
10836 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10837 countreg = ix86_zero_extend_to_Pmode (countreg);
10841 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10842 destreg, srcreg, countreg));
10844 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10845 destreg, srcreg, countreg));
10848 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10849 destreg, srcreg, countreg));
10851 if (size == 8 && (count & 0x04))
10852 emit_insn (gen_strmovsi (destreg, srcreg));
10854 emit_insn (gen_strmovhi (destreg, srcreg));
10856 emit_insn (gen_strmovqi (destreg, srcreg));
10858 /* The generic code based on the glibc implementation:
10859 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10860 allowing accelerated copying there)
10861 - copy the data using rep movsl
10862 - copy the rest. */
10867 int desired_alignment = (TARGET_PENTIUMPRO
10868 && (count == 0 || count >= (unsigned int) 260)
10869 ? 8 : UNITS_PER_WORD);
10871 /* In case we don't know anything about the alignment, default to
10872 library version, since it is usually equally fast and result in
10875 Also emit call when we know that the count is large and call overhead
10876 will not be important. */
10877 if (!TARGET_INLINE_ALL_STRINGOPS
10878 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10884 if (TARGET_SINGLE_STRINGOP)
10885 emit_insn (gen_cld ());
10887 countreg2 = gen_reg_rtx (Pmode);
10888 countreg = copy_to_mode_reg (counter_mode, count_exp);
10890 /* We don't use loops to align destination and to copy parts smaller
10891 than 4 bytes, because gcc is able to optimize such code better (in
10892 the case the destination or the count really is aligned, gcc is often
10893 able to predict the branches) and also it is friendlier to the
10894 hardware branch prediction.
10896 Using loops is beneficial for generic case, because we can
10897 handle small counts using the loops. Many CPUs (such as Athlon)
10898 have large REP prefix setup costs.
10900 This is quite costly. Maybe we can revisit this decision later or
10901 add some customizability to this code. */
10903 if (count == 0 && align < desired_alignment)
10905 label = gen_label_rtx ();
10906 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10907 LEU, 0, counter_mode, 1, label);
10911 rtx label = ix86_expand_aligntest (destreg, 1);
10912 emit_insn (gen_strmovqi (destreg, srcreg));
10913 ix86_adjust_counter (countreg, 1);
10914 emit_label (label);
10915 LABEL_NUSES (label) = 1;
10919 rtx label = ix86_expand_aligntest (destreg, 2);
10920 emit_insn (gen_strmovhi (destreg, srcreg));
10921 ix86_adjust_counter (countreg, 2);
10922 emit_label (label);
10923 LABEL_NUSES (label) = 1;
10925 if (align <= 4 && desired_alignment > 4)
10927 rtx label = ix86_expand_aligntest (destreg, 4);
10928 emit_insn (gen_strmovsi (destreg, srcreg));
10929 ix86_adjust_counter (countreg, 4);
10930 emit_label (label);
10931 LABEL_NUSES (label) = 1;
10934 if (label && desired_alignment > 4 && !TARGET_64BIT)
10936 emit_label (label);
10937 LABEL_NUSES (label) = 1;
10940 if (!TARGET_SINGLE_STRINGOP)
10941 emit_insn (gen_cld ());
10944 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10946 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10947 destreg, srcreg, countreg2));
10951 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10952 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10953 destreg, srcreg, countreg2));
10958 emit_label (label);
10959 LABEL_NUSES (label) = 1;
10961 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10962 emit_insn (gen_strmovsi (destreg, srcreg));
10963 if ((align <= 4 || count == 0) && TARGET_64BIT)
10965 rtx label = ix86_expand_aligntest (countreg, 4);
10966 emit_insn (gen_strmovsi (destreg, srcreg));
10967 emit_label (label);
10968 LABEL_NUSES (label) = 1;
10970 if (align > 2 && count != 0 && (count & 2))
10971 emit_insn (gen_strmovhi (destreg, srcreg));
10972 if (align <= 2 || count == 0)
10974 rtx label = ix86_expand_aligntest (countreg, 2);
10975 emit_insn (gen_strmovhi (destreg, srcreg));
10976 emit_label (label);
10977 LABEL_NUSES (label) = 1;
10979 if (align > 1 && count != 0 && (count & 1))
10980 emit_insn (gen_strmovqi (destreg, srcreg));
10981 if (align <= 1 || count == 0)
10983 rtx label = ix86_expand_aligntest (countreg, 1);
10984 emit_insn (gen_strmovqi (destreg, srcreg));
10985 emit_label (label);
10986 LABEL_NUSES (label) = 1;
10990 insns = get_insns ();
10993 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10998 /* Expand string clear operation (bzero). Use i386 string operations when
10999 profitable. expand_movstr contains similar code. */
11001 ix86_expand_clrstr (src, count_exp, align_exp)
11002 rtx src, count_exp, align_exp;
11004 rtx destreg, zeroreg, countreg;
11005 enum machine_mode counter_mode;
11006 HOST_WIDE_INT align = 0;
11007 unsigned HOST_WIDE_INT count = 0;
11009 if (GET_CODE (align_exp) == CONST_INT)
11010 align = INTVAL (align_exp);
11012 /* Can't use any of this if the user has appropriated esi. */
11013 if (global_regs[4])
11016 /* This simple hack avoids all inlining code and simplifies code below. */
11017 if (!TARGET_ALIGN_STRINGOPS)
11020 if (GET_CODE (count_exp) == CONST_INT)
11022 count = INTVAL (count_exp);
11023 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11026 /* Figure out proper mode for counter. For 32bits it is always SImode,
11027 for 64bits use SImode when possible, otherwise DImode.
11028 Set count to number of bytes copied when known at compile time. */
11029 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11030 || x86_64_zero_extended_value (count_exp))
11031 counter_mode = SImode;
11033 counter_mode = DImode;
11035 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11037 emit_insn (gen_cld ());
11039 /* When optimizing for size emit simple rep ; movsb instruction for
11040 counts not divisible by 4. */
11042 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11044 countreg = ix86_zero_extend_to_Pmode (count_exp);
11045 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11047 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11048 destreg, countreg));
11050 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11051 destreg, countreg));
11053 else if (count != 0
11055 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11056 || optimize_size || count < (unsigned int) 64))
11058 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11059 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11060 if (count & ~(size - 1))
11062 countreg = copy_to_mode_reg (counter_mode,
11063 GEN_INT ((count >> (size == 4 ? 2 : 3))
11064 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11065 countreg = ix86_zero_extend_to_Pmode (countreg);
11069 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11070 destreg, countreg));
11072 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11073 destreg, countreg));
11076 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11077 destreg, countreg));
11079 if (size == 8 && (count & 0x04))
11080 emit_insn (gen_strsetsi (destreg,
11081 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11083 emit_insn (gen_strsethi (destreg,
11084 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11086 emit_insn (gen_strsetqi (destreg,
11087 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11093 /* Compute desired alignment of the string operation. */
11094 int desired_alignment = (TARGET_PENTIUMPRO
11095 && (count == 0 || count >= (unsigned int) 260)
11096 ? 8 : UNITS_PER_WORD);
11098 /* In case we don't know anything about the alignment, default to
11099 library version, since it is usually equally fast and result in
11102 Also emit call when we know that the count is large and call overhead
11103 will not be important. */
11104 if (!TARGET_INLINE_ALL_STRINGOPS
11105 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11108 if (TARGET_SINGLE_STRINGOP)
11109 emit_insn (gen_cld ());
11111 countreg2 = gen_reg_rtx (Pmode);
11112 countreg = copy_to_mode_reg (counter_mode, count_exp);
11113 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11115 if (count == 0 && align < desired_alignment)
11117 label = gen_label_rtx ();
11118 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11119 LEU, 0, counter_mode, 1, label);
11123 rtx label = ix86_expand_aligntest (destreg, 1);
11124 emit_insn (gen_strsetqi (destreg,
11125 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11126 ix86_adjust_counter (countreg, 1);
11127 emit_label (label);
11128 LABEL_NUSES (label) = 1;
11132 rtx label = ix86_expand_aligntest (destreg, 2);
11133 emit_insn (gen_strsethi (destreg,
11134 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11135 ix86_adjust_counter (countreg, 2);
11136 emit_label (label);
11137 LABEL_NUSES (label) = 1;
11139 if (align <= 4 && desired_alignment > 4)
11141 rtx label = ix86_expand_aligntest (destreg, 4);
11142 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11143 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11145 ix86_adjust_counter (countreg, 4);
11146 emit_label (label);
11147 LABEL_NUSES (label) = 1;
11150 if (label && desired_alignment > 4 && !TARGET_64BIT)
11152 emit_label (label);
11153 LABEL_NUSES (label) = 1;
11157 if (!TARGET_SINGLE_STRINGOP)
11158 emit_insn (gen_cld ());
11161 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11163 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11164 destreg, countreg2));
11168 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11169 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11170 destreg, countreg2));
11174 emit_label (label);
11175 LABEL_NUSES (label) = 1;
11178 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11179 emit_insn (gen_strsetsi (destreg,
11180 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11181 if (TARGET_64BIT && (align <= 4 || count == 0))
11183 rtx label = ix86_expand_aligntest (countreg, 4);
11184 emit_insn (gen_strsetsi (destreg,
11185 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11186 emit_label (label);
11187 LABEL_NUSES (label) = 1;
11189 if (align > 2 && count != 0 && (count & 2))
11190 emit_insn (gen_strsethi (destreg,
11191 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11192 if (align <= 2 || count == 0)
11194 rtx label = ix86_expand_aligntest (countreg, 2);
11195 emit_insn (gen_strsethi (destreg,
11196 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11197 emit_label (label);
11198 LABEL_NUSES (label) = 1;
11200 if (align > 1 && count != 0 && (count & 1))
11201 emit_insn (gen_strsetqi (destreg,
11202 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11203 if (align <= 1 || count == 0)
11205 rtx label = ix86_expand_aligntest (countreg, 1);
11206 emit_insn (gen_strsetqi (destreg,
11207 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11208 emit_label (label);
11209 LABEL_NUSES (label) = 1;
11214 /* Expand strlen. */
11216 ix86_expand_strlen (out, src, eoschar, align)
11217 rtx out, src, eoschar, align;
11219 rtx addr, scratch1, scratch2, scratch3, scratch4;
11221 /* The generic case of strlen expander is long. Avoid it's
11222 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11224 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11225 && !TARGET_INLINE_ALL_STRINGOPS
11227 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11230 addr = force_reg (Pmode, XEXP (src, 0));
11231 scratch1 = gen_reg_rtx (Pmode);
11233 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11236 /* Well it seems that some optimizer does not combine a call like
11237 foo(strlen(bar), strlen(bar));
11238 when the move and the subtraction is done here. It does calculate
11239 the length just once when these instructions are done inside of
11240 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11241 often used and I use one fewer register for the lifetime of
11242 output_strlen_unroll() this is better. */
11244 emit_move_insn (out, addr);
11246 ix86_expand_strlensi_unroll_1 (out, align);
11248 /* strlensi_unroll_1 returns the address of the zero at the end of
11249 the string, like memchr(), so compute the length by subtracting
11250 the start address. */
11252 emit_insn (gen_subdi3 (out, out, addr));
11254 emit_insn (gen_subsi3 (out, out, addr));
11258 scratch2 = gen_reg_rtx (Pmode);
11259 scratch3 = gen_reg_rtx (Pmode);
11260 scratch4 = force_reg (Pmode, constm1_rtx);
11262 emit_move_insn (scratch3, addr);
11263 eoschar = force_reg (QImode, eoschar);
11265 emit_insn (gen_cld ());
11268 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11269 align, scratch4, scratch3));
11270 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11271 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11275 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11276 align, scratch4, scratch3));
11277 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11278 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11284 /* Expand the appropriate insns for doing strlen if not just doing
11287 out = result, initialized with the start address
11288 align_rtx = alignment of the address.
11289 scratch = scratch register, initialized with the startaddress when
11290 not aligned, otherwise undefined
11292 This is just the body. It needs the initialisations mentioned above and
11293 some address computing at the end. These things are done in i386.md. */
11296 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11297 rtx out, align_rtx;
11301 rtx align_2_label = NULL_RTX;
11302 rtx align_3_label = NULL_RTX;
11303 rtx align_4_label = gen_label_rtx ();
11304 rtx end_0_label = gen_label_rtx ();
11306 rtx tmpreg = gen_reg_rtx (SImode);
11307 rtx scratch = gen_reg_rtx (SImode);
11311 if (GET_CODE (align_rtx) == CONST_INT)
11312 align = INTVAL (align_rtx);
11314 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11316 /* Is there a known alignment and is it less than 4? */
11319 rtx scratch1 = gen_reg_rtx (Pmode);
11320 emit_move_insn (scratch1, out);
11321 /* Is there a known alignment and is it not 2? */
11324 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11325 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11327 /* Leave just the 3 lower bits. */
11328 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11329 NULL_RTX, 0, OPTAB_WIDEN);
11331 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11332 Pmode, 1, align_4_label);
11333 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11334 Pmode, 1, align_2_label);
11335 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11336 Pmode, 1, align_3_label);
11340 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11341 check if is aligned to 4 - byte. */
11343 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11344 NULL_RTX, 0, OPTAB_WIDEN);
11346 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11347 Pmode, 1, align_4_label);
11350 mem = gen_rtx_MEM (QImode, out);
11352 /* Now compare the bytes. */
11354 /* Compare the first n unaligned byte on a byte per byte basis. */
11355 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11356 QImode, 1, end_0_label);
11358 /* Increment the address. */
11360 emit_insn (gen_adddi3 (out, out, const1_rtx));
11362 emit_insn (gen_addsi3 (out, out, const1_rtx));
11364 /* Not needed with an alignment of 2 */
11367 emit_label (align_2_label);
11369 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11373 emit_insn (gen_adddi3 (out, out, const1_rtx));
11375 emit_insn (gen_addsi3 (out, out, const1_rtx));
11377 emit_label (align_3_label);
11380 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11384 emit_insn (gen_adddi3 (out, out, const1_rtx));
11386 emit_insn (gen_addsi3 (out, out, const1_rtx));
11389 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11390 align this loop. It gives only huge programs, but does not help to
11392 emit_label (align_4_label);
11394 mem = gen_rtx_MEM (SImode, out);
11395 emit_move_insn (scratch, mem);
11397 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11399 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11401 /* This formula yields a nonzero result iff one of the bytes is zero.
11402 This saves three branches inside loop and many cycles. */
11404 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11405 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11406 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11407 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11408 gen_int_mode (0x80808080, SImode)));
11409 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11414 rtx reg = gen_reg_rtx (SImode);
11415 rtx reg2 = gen_reg_rtx (Pmode);
11416 emit_move_insn (reg, tmpreg);
11417 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11419 /* If zero is not in the first two bytes, move two bytes forward. */
11420 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11421 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11422 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11423 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11424 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11427 /* Emit lea manually to avoid clobbering of flags. */
11428 emit_insn (gen_rtx_SET (SImode, reg2,
11429 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11431 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11432 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11433 emit_insn (gen_rtx_SET (VOIDmode, out,
11434 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11441 rtx end_2_label = gen_label_rtx ();
11442 /* Is zero in the first two bytes? */
11444 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11445 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11446 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11447 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11448 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11450 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11451 JUMP_LABEL (tmp) = end_2_label;
11453 /* Not in the first two. Move two bytes forward. */
11454 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11456 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11458 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11460 emit_label (end_2_label);
11464 /* Avoid branch in fixing the byte. */
11465 tmpreg = gen_lowpart (QImode, tmpreg);
11466 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11467 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11469 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11471 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11473 emit_label (end_0_label);
11477 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11478 rtx retval, fnaddr, callarg1, callarg2, pop;
11481 rtx use = NULL, call;
11483 if (pop == const0_rtx)
11485 if (TARGET_64BIT && pop)
11489 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11490 fnaddr = machopic_indirect_call_target (fnaddr);
11492 /* Static functions and indirect calls don't need the pic register. */
11493 if (! TARGET_64BIT && flag_pic
11494 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11495 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11496 use_reg (&use, pic_offset_table_rtx);
11498 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11500 rtx al = gen_rtx_REG (QImode, 0);
11501 emit_move_insn (al, callarg2);
11502 use_reg (&use, al);
11504 #endif /* TARGET_MACHO */
11506 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11508 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11509 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11511 if (sibcall && TARGET_64BIT
11512 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11515 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11516 fnaddr = gen_rtx_REG (Pmode, 40);
11517 emit_move_insn (fnaddr, addr);
11518 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11521 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11523 call = gen_rtx_SET (VOIDmode, retval, call);
11526 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11527 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11528 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11531 call = emit_call_insn (call);
11533 CALL_INSN_FUNCTION_USAGE (call) = use;
11537 /* Clear stack slot assignments remembered from previous functions.
11538 This is called from INIT_EXPANDERS once before RTL is emitted for each
11541 static struct machine_function *
11542 ix86_init_machine_status ()
11544 return ggc_alloc_cleared (sizeof (struct machine_function));
11547 /* Return a MEM corresponding to a stack slot with mode MODE.
11548 Allocate a new slot if necessary.
11550 The RTL for a function can have several slots available: N is
11551 which slot to use. */
11554 assign_386_stack_local (mode, n)
11555 enum machine_mode mode;
11558 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11561 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11562 ix86_stack_locals[(int) mode][n]
11563 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11565 return ix86_stack_locals[(int) mode][n];
11568 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11570 static GTY(()) rtx ix86_tls_symbol;
11572 ix86_tls_get_addr ()
11575 if (!ix86_tls_symbol)
11577 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11578 (TARGET_GNU_TLS && !TARGET_64BIT)
11579 ? "___tls_get_addr"
11580 : "__tls_get_addr");
11583 return ix86_tls_symbol;
11586 /* Calculate the length of the memory address in the instruction
11587 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11590 memory_address_length (addr)
11593 struct ix86_address parts;
11594 rtx base, index, disp;
11597 if (GET_CODE (addr) == PRE_DEC
11598 || GET_CODE (addr) == POST_INC
11599 || GET_CODE (addr) == PRE_MODIFY
11600 || GET_CODE (addr) == POST_MODIFY)
11603 if (! ix86_decompose_address (addr, &parts))
11607 index = parts.index;
11611 /* Register Indirect. */
11612 if (base && !index && !disp)
11614 /* Special cases: ebp and esp need the two-byte modrm form. */
11615 if (addr == stack_pointer_rtx
11616 || addr == arg_pointer_rtx
11617 || addr == frame_pointer_rtx
11618 || addr == hard_frame_pointer_rtx)
11622 /* Direct Addressing. */
11623 else if (disp && !base && !index)
11628 /* Find the length of the displacement constant. */
11631 if (GET_CODE (disp) == CONST_INT
11632 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11638 /* An index requires the two-byte modrm form. */
11646 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11647 is set, expect that insn have 8bit immediate alternative. */
11649 ix86_attr_length_immediate_default (insn, shortform)
11655 extract_insn_cached (insn);
11656 for (i = recog_data.n_operands - 1; i >= 0; --i)
11657 if (CONSTANT_P (recog_data.operand[i]))
11662 && GET_CODE (recog_data.operand[i]) == CONST_INT
11663 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11667 switch (get_attr_mode (insn))
11678 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11683 fatal_insn ("unknown insn mode", insn);
11689 /* Compute default value for "length_address" attribute. */
11691 ix86_attr_length_address_default (insn)
11695 extract_insn_cached (insn);
11696 for (i = recog_data.n_operands - 1; i >= 0; --i)
11697 if (GET_CODE (recog_data.operand[i]) == MEM)
11699 return memory_address_length (XEXP (recog_data.operand[i], 0));
11705 /* Return the maximum number of instructions a cpu can issue. */
11712 case PROCESSOR_PENTIUM:
11716 case PROCESSOR_PENTIUMPRO:
11717 case PROCESSOR_PENTIUM4:
11718 case PROCESSOR_ATHLON:
11727 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11728 by DEP_INSN and nothing set by DEP_INSN. */
11731 ix86_flags_dependant (insn, dep_insn, insn_type)
11732 rtx insn, dep_insn;
11733 enum attr_type insn_type;
11737 /* Simplify the test for uninteresting insns. */
11738 if (insn_type != TYPE_SETCC
11739 && insn_type != TYPE_ICMOV
11740 && insn_type != TYPE_FCMOV
11741 && insn_type != TYPE_IBR)
11744 if ((set = single_set (dep_insn)) != 0)
11746 set = SET_DEST (set);
11749 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11750 && XVECLEN (PATTERN (dep_insn), 0) == 2
11751 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11752 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11754 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11755 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11760 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11763 /* This test is true if the dependent insn reads the flags but
11764 not any other potentially set register. */
11765 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11768 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11774 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11775 address with operands set by DEP_INSN. */
11778 ix86_agi_dependant (insn, dep_insn, insn_type)
11779 rtx insn, dep_insn;
11780 enum attr_type insn_type;
11784 if (insn_type == TYPE_LEA
11787 addr = PATTERN (insn);
11788 if (GET_CODE (addr) == SET)
11790 else if (GET_CODE (addr) == PARALLEL
11791 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11792 addr = XVECEXP (addr, 0, 0);
11795 addr = SET_SRC (addr);
11800 extract_insn_cached (insn);
11801 for (i = recog_data.n_operands - 1; i >= 0; --i)
11802 if (GET_CODE (recog_data.operand[i]) == MEM)
11804 addr = XEXP (recog_data.operand[i], 0);
11811 return modified_in_p (addr, dep_insn);
11815 ix86_adjust_cost (insn, link, dep_insn, cost)
11816 rtx insn, link, dep_insn;
11819 enum attr_type insn_type, dep_insn_type;
11820 enum attr_memory memory, dep_memory;
11822 int dep_insn_code_number;
11824 /* Anti and output dependencies have zero cost on all CPUs. */
11825 if (REG_NOTE_KIND (link) != 0)
11828 dep_insn_code_number = recog_memoized (dep_insn);
11830 /* If we can't recognize the insns, we can't really do anything. */
11831 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11834 insn_type = get_attr_type (insn);
11835 dep_insn_type = get_attr_type (dep_insn);
11839 case PROCESSOR_PENTIUM:
11840 /* Address Generation Interlock adds a cycle of latency. */
11841 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11844 /* ??? Compares pair with jump/setcc. */
11845 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11848 /* Floating point stores require value to be ready one cycle earlier. */
11849 if (insn_type == TYPE_FMOV
11850 && get_attr_memory (insn) == MEMORY_STORE
11851 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11855 case PROCESSOR_PENTIUMPRO:
11856 memory = get_attr_memory (insn);
11857 dep_memory = get_attr_memory (dep_insn);
11859 /* Since we can't represent delayed latencies of load+operation,
11860 increase the cost here for non-imov insns. */
11861 if (dep_insn_type != TYPE_IMOV
11862 && dep_insn_type != TYPE_FMOV
11863 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11866 /* INT->FP conversion is expensive. */
11867 if (get_attr_fp_int_src (dep_insn))
11870 /* There is one cycle extra latency between an FP op and a store. */
11871 if (insn_type == TYPE_FMOV
11872 && (set = single_set (dep_insn)) != NULL_RTX
11873 && (set2 = single_set (insn)) != NULL_RTX
11874 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11875 && GET_CODE (SET_DEST (set2)) == MEM)
11878 /* Show ability of reorder buffer to hide latency of load by executing
11879 in parallel with previous instruction in case
11880 previous instruction is not needed to compute the address. */
11881 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11882 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11884 /* Claim moves to take one cycle, as core can issue one load
11885 at time and the next load can start cycle later. */
11886 if (dep_insn_type == TYPE_IMOV
11887 || dep_insn_type == TYPE_FMOV)
11895 memory = get_attr_memory (insn);
11896 dep_memory = get_attr_memory (dep_insn);
11897 /* The esp dependency is resolved before the instruction is really
11899 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11900 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11903 /* Since we can't represent delayed latencies of load+operation,
11904 increase the cost here for non-imov insns. */
11905 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11906 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11908 /* INT->FP conversion is expensive. */
11909 if (get_attr_fp_int_src (dep_insn))
11912 /* Show ability of reorder buffer to hide latency of load by executing
11913 in parallel with previous instruction in case
11914 previous instruction is not needed to compute the address. */
11915 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11916 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11918 /* Claim moves to take one cycle, as core can issue one load
11919 at time and the next load can start cycle later. */
11920 if (dep_insn_type == TYPE_IMOV
11921 || dep_insn_type == TYPE_FMOV)
11930 case PROCESSOR_ATHLON:
11932 memory = get_attr_memory (insn);
11933 dep_memory = get_attr_memory (dep_insn);
11935 /* Show ability of reorder buffer to hide latency of load by executing
11936 in parallel with previous instruction in case
11937 previous instruction is not needed to compute the address. */
11938 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11939 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11941 /* Claim moves to take one cycle, as core can issue one load
11942 at time and the next load can start cycle later. */
11943 if (dep_insn_type == TYPE_IMOV
11944 || dep_insn_type == TYPE_FMOV)
11946 else if (cost >= 3)
11961 struct ppro_sched_data
11964 int issued_this_cycle;
11968 static enum attr_ppro_uops
11969 ix86_safe_ppro_uops (insn)
11972 if (recog_memoized (insn) >= 0)
11973 return get_attr_ppro_uops (insn);
11975 return PPRO_UOPS_MANY;
11979 ix86_dump_ppro_packet (dump)
11982 if (ix86_sched_data.ppro.decode[0])
11984 fprintf (dump, "PPRO packet: %d",
11985 INSN_UID (ix86_sched_data.ppro.decode[0]));
11986 if (ix86_sched_data.ppro.decode[1])
11987 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11988 if (ix86_sched_data.ppro.decode[2])
11989 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11990 fputc ('\n', dump);
11994 /* We're beginning a new block. Initialize data structures as necessary. */
11997 ix86_sched_init (dump, sched_verbose, veclen)
11998 FILE *dump ATTRIBUTE_UNUSED;
11999 int sched_verbose ATTRIBUTE_UNUSED;
12000 int veclen ATTRIBUTE_UNUSED;
12002 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12005 /* Shift INSN to SLOT, and shift everything else down. */
12008 ix86_reorder_insn (insnp, slot)
12015 insnp[0] = insnp[1];
12016 while (++insnp != slot);
12022 ix86_sched_reorder_ppro (ready, e_ready)
12027 enum attr_ppro_uops cur_uops;
12028 int issued_this_cycle;
12032 /* At this point .ppro.decode contains the state of the three
12033 decoders from last "cycle". That is, those insns that were
12034 actually independent. But here we're scheduling for the
12035 decoder, and we may find things that are decodable in the
12038 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12039 issued_this_cycle = 0;
12042 cur_uops = ix86_safe_ppro_uops (*insnp);
12044 /* If the decoders are empty, and we've a complex insn at the
12045 head of the priority queue, let it issue without complaint. */
12046 if (decode[0] == NULL)
12048 if (cur_uops == PPRO_UOPS_MANY)
12050 decode[0] = *insnp;
12054 /* Otherwise, search for a 2-4 uop unsn to issue. */
12055 while (cur_uops != PPRO_UOPS_FEW)
12057 if (insnp == ready)
12059 cur_uops = ix86_safe_ppro_uops (*--insnp);
12062 /* If so, move it to the head of the line. */
12063 if (cur_uops == PPRO_UOPS_FEW)
12064 ix86_reorder_insn (insnp, e_ready);
12066 /* Issue the head of the queue. */
12067 issued_this_cycle = 1;
12068 decode[0] = *e_ready--;
12071 /* Look for simple insns to fill in the other two slots. */
12072 for (i = 1; i < 3; ++i)
12073 if (decode[i] == NULL)
12075 if (ready > e_ready)
12079 cur_uops = ix86_safe_ppro_uops (*insnp);
12080 while (cur_uops != PPRO_UOPS_ONE)
12082 if (insnp == ready)
12084 cur_uops = ix86_safe_ppro_uops (*--insnp);
12087 /* Found one. Move it to the head of the queue and issue it. */
12088 if (cur_uops == PPRO_UOPS_ONE)
12090 ix86_reorder_insn (insnp, e_ready);
12091 decode[i] = *e_ready--;
12092 issued_this_cycle++;
12096 /* ??? Didn't find one. Ideally, here we would do a lazy split
12097 of 2-uop insns, issue one and queue the other. */
12101 if (issued_this_cycle == 0)
12102 issued_this_cycle = 1;
12103 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12106 /* We are about to being issuing insns for this clock cycle.
12107 Override the default sort algorithm to better slot instructions. */
12109 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12110 FILE *dump ATTRIBUTE_UNUSED;
12111 int sched_verbose ATTRIBUTE_UNUSED;
12114 int clock_var ATTRIBUTE_UNUSED;
12116 int n_ready = *n_readyp;
12117 rtx *e_ready = ready + n_ready - 1;
12119 /* Make sure to go ahead and initialize key items in
12120 ix86_sched_data if we are not going to bother trying to
12121 reorder the ready queue. */
12124 ix86_sched_data.ppro.issued_this_cycle = 1;
12133 case PROCESSOR_PENTIUMPRO:
12134 ix86_sched_reorder_ppro (ready, e_ready);
12139 return ix86_issue_rate ();
12142 /* We are about to issue INSN. Return the number of insns left on the
12143 ready queue that can be issued this cycle. */
12146 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12150 int can_issue_more;
12156 return can_issue_more - 1;
12158 case PROCESSOR_PENTIUMPRO:
12160 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12162 if (uops == PPRO_UOPS_MANY)
12165 ix86_dump_ppro_packet (dump);
12166 ix86_sched_data.ppro.decode[0] = insn;
12167 ix86_sched_data.ppro.decode[1] = NULL;
12168 ix86_sched_data.ppro.decode[2] = NULL;
12170 ix86_dump_ppro_packet (dump);
12171 ix86_sched_data.ppro.decode[0] = NULL;
12173 else if (uops == PPRO_UOPS_FEW)
12176 ix86_dump_ppro_packet (dump);
12177 ix86_sched_data.ppro.decode[0] = insn;
12178 ix86_sched_data.ppro.decode[1] = NULL;
12179 ix86_sched_data.ppro.decode[2] = NULL;
12183 for (i = 0; i < 3; ++i)
12184 if (ix86_sched_data.ppro.decode[i] == NULL)
12186 ix86_sched_data.ppro.decode[i] = insn;
12194 ix86_dump_ppro_packet (dump);
12195 ix86_sched_data.ppro.decode[0] = NULL;
12196 ix86_sched_data.ppro.decode[1] = NULL;
12197 ix86_sched_data.ppro.decode[2] = NULL;
12201 return --ix86_sched_data.ppro.issued_this_cycle;
12206 ia32_use_dfa_pipeline_interface ()
12208 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12213 /* How many alternative schedules to try. This should be as wide as the
12214 scheduling freedom in the DFA, but no wider. Making this value too
12215 large results extra work for the scheduler. */
12218 ia32_multipass_dfa_lookahead ()
12220 if (ix86_cpu == PROCESSOR_PENTIUM)
12227 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12228 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12232 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12234 rtx dstref, srcref, dstreg, srcreg;
12238 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12240 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12244 /* Subroutine of above to actually do the updating by recursively walking
12248 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12250 rtx dstref, srcref, dstreg, srcreg;
12252 enum rtx_code code = GET_CODE (x);
12253 const char *format_ptr = GET_RTX_FORMAT (code);
12256 if (code == MEM && XEXP (x, 0) == dstreg)
12257 MEM_COPY_ATTRIBUTES (x, dstref);
12258 else if (code == MEM && XEXP (x, 0) == srcreg)
12259 MEM_COPY_ATTRIBUTES (x, srcref);
12261 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12263 if (*format_ptr == 'e')
12264 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12266 else if (*format_ptr == 'E')
12267 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12268 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12273 /* Compute the alignment given to a constant that is being placed in memory.
12274 EXP is the constant and ALIGN is the alignment that the object would
12276 The value of this function is used instead of that alignment to align
12280 ix86_constant_alignment (exp, align)
12284 if (TREE_CODE (exp) == REAL_CST)
12286 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12288 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12291 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12298 /* Compute the alignment for a static variable.
12299 TYPE is the data type, and ALIGN is the alignment that
12300 the object would ordinarily have. The value of this function is used
12301 instead of that alignment to align the object. */
12304 ix86_data_alignment (type, align)
12308 if (AGGREGATE_TYPE_P (type)
12309 && TYPE_SIZE (type)
12310 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12311 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12312 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12315 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12316 to 16byte boundary. */
12319 if (AGGREGATE_TYPE_P (type)
12320 && TYPE_SIZE (type)
12321 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12322 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12323 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12327 if (TREE_CODE (type) == ARRAY_TYPE)
12329 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12331 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12334 else if (TREE_CODE (type) == COMPLEX_TYPE)
12337 if (TYPE_MODE (type) == DCmode && align < 64)
12339 if (TYPE_MODE (type) == XCmode && align < 128)
12342 else if ((TREE_CODE (type) == RECORD_TYPE
12343 || TREE_CODE (type) == UNION_TYPE
12344 || TREE_CODE (type) == QUAL_UNION_TYPE)
12345 && TYPE_FIELDS (type))
12347 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12349 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12352 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12353 || TREE_CODE (type) == INTEGER_TYPE)
12355 if (TYPE_MODE (type) == DFmode && align < 64)
12357 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12364 /* Compute the alignment for a local variable.
12365 TYPE is the data type, and ALIGN is the alignment that
12366 the object would ordinarily have. The value of this macro is used
12367 instead of that alignment to align the object. */
12370 ix86_local_alignment (type, align)
12374 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12375 to 16byte boundary. */
12378 if (AGGREGATE_TYPE_P (type)
12379 && TYPE_SIZE (type)
12380 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12381 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12382 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12385 if (TREE_CODE (type) == ARRAY_TYPE)
12387 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12389 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12392 else if (TREE_CODE (type) == COMPLEX_TYPE)
12394 if (TYPE_MODE (type) == DCmode && align < 64)
12396 if (TYPE_MODE (type) == XCmode && align < 128)
12399 else if ((TREE_CODE (type) == RECORD_TYPE
12400 || TREE_CODE (type) == UNION_TYPE
12401 || TREE_CODE (type) == QUAL_UNION_TYPE)
12402 && TYPE_FIELDS (type))
12404 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12406 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12409 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12410 || TREE_CODE (type) == INTEGER_TYPE)
12413 if (TYPE_MODE (type) == DFmode && align < 64)
12415 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12421 /* Emit RTL insns to initialize the variable parts of a trampoline.
12422 FNADDR is an RTX for the address of the function's pure code.
12423 CXT is an RTX for the static chain value for the function. */
12425 x86_initialize_trampoline (tramp, fnaddr, cxt)
12426 rtx tramp, fnaddr, cxt;
12430 /* Compute offset from the end of the jmp to the target function. */
12431 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12432 plus_constant (tramp, 10),
12433 NULL_RTX, 1, OPTAB_DIRECT);
12434 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12435 gen_int_mode (0xb9, QImode));
12436 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12437 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12438 gen_int_mode (0xe9, QImode));
12439 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12444 /* Try to load address using shorter movl instead of movabs.
12445 We may want to support movq for kernel mode, but kernel does not use
12446 trampolines at the moment. */
12447 if (x86_64_zero_extended_value (fnaddr))
12449 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12450 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12451 gen_int_mode (0xbb41, HImode));
12452 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12453 gen_lowpart (SImode, fnaddr));
12458 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12459 gen_int_mode (0xbb49, HImode));
12460 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12464 /* Load static chain using movabs to r10. */
12465 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12466 gen_int_mode (0xba49, HImode));
12467 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12470 /* Jump to the r11 */
12471 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12472 gen_int_mode (0xff49, HImode));
12473 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12474 gen_int_mode (0xe3, QImode));
12476 if (offset > TRAMPOLINE_SIZE)
12480 #ifdef TRANSFER_FROM_TRAMPOLINE
12481 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12482 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12486 #define def_builtin(MASK, NAME, TYPE, CODE) \
12488 if ((MASK) & target_flags) \
12489 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12490 NULL, NULL_TREE); \
12493 struct builtin_description
12495 const unsigned int mask;
12496 const enum insn_code icode;
12497 const char *const name;
12498 const enum ix86_builtins code;
12499 const enum rtx_code comparison;
12500 const unsigned int flag;
12503 /* Used for builtins that are enabled both by -msse and -msse2. */
12504 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12506 static const struct builtin_description bdesc_comi[] =
12508 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12509 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12510 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12511 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12512 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12513 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12514 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12515 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12516 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12517 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12518 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12519 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12520 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12521 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12522 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12523 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12524 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12525 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12526 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12527 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12528 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12529 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12530 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12531 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12534 static const struct builtin_description bdesc_2arg[] =
12537 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12538 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12539 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12542 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12543 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12544 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12546 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12547 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12548 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12549 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12550 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12551 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12552 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12553 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12554 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12555 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12556 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12557 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12558 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12559 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12560 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12561 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12562 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12563 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12564 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12565 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12567 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12568 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12569 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12570 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12572 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12573 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12574 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12575 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12577 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12578 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12579 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12580 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12581 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12584 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12585 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12586 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12587 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12588 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12589 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12591 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12592 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12593 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12594 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12595 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12596 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12597 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12598 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12600 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12601 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12602 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12604 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12605 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12606 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12607 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12609 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12610 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12612 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12613 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12614 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12615 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12616 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12617 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12619 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12620 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12621 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12622 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12624 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12625 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12627 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12628 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12629 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12632 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12633 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12634 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12636 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12637 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12639 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12640 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12641 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12642 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12643 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12644 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12646 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12647 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12648 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12649 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12650 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12651 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12653 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12654 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12655 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12656 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12658 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12659 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12667 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12668 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12669 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12671 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12672 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12673 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12674 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12675 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12676 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12677 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12678 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12679 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12680 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12681 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12682 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12683 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12684 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12685 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12686 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12687 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12688 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12689 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12690 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12692 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12693 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12704 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12712 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12713 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12714 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12716 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12717 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12718 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12719 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12720 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12721 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12722 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12723 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12726 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12741 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12746 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12755 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12756 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12759 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12763 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12764 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12769 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12770 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12776 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12783 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12787 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12788 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12789 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12792 static const struct builtin_description bdesc_1arg[] =
12794 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12795 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12797 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12798 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12799 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12801 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12802 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12803 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12804 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12818 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12827 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12829 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12831 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12835 ix86_init_builtins ()
12838 ix86_init_mmx_sse_builtins ();
12841 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12842 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12845 ix86_init_mmx_sse_builtins ()
12847 const struct builtin_description * d;
12850 tree pchar_type_node = build_pointer_type (char_type_node);
12851 tree pcchar_type_node = build_pointer_type (
12852 build_type_variant (char_type_node, 1, 0));
12853 tree pfloat_type_node = build_pointer_type (float_type_node);
12854 tree pcfloat_type_node = build_pointer_type (
12855 build_type_variant (float_type_node, 1, 0));
12856 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12857 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12858 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12861 tree int_ftype_v4sf_v4sf
12862 = build_function_type_list (integer_type_node,
12863 V4SF_type_node, V4SF_type_node, NULL_TREE);
12864 tree v4si_ftype_v4sf_v4sf
12865 = build_function_type_list (V4SI_type_node,
12866 V4SF_type_node, V4SF_type_node, NULL_TREE);
12867 /* MMX/SSE/integer conversions. */
12868 tree int_ftype_v4sf
12869 = build_function_type_list (integer_type_node,
12870 V4SF_type_node, NULL_TREE);
12871 tree int_ftype_v8qi
12872 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12873 tree v4sf_ftype_v4sf_int
12874 = build_function_type_list (V4SF_type_node,
12875 V4SF_type_node, integer_type_node, NULL_TREE);
12876 tree v4sf_ftype_v4sf_v2si
12877 = build_function_type_list (V4SF_type_node,
12878 V4SF_type_node, V2SI_type_node, NULL_TREE);
12879 tree int_ftype_v4hi_int
12880 = build_function_type_list (integer_type_node,
12881 V4HI_type_node, integer_type_node, NULL_TREE);
12882 tree v4hi_ftype_v4hi_int_int
12883 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12884 integer_type_node, integer_type_node,
12886 /* Miscellaneous. */
12887 tree v8qi_ftype_v4hi_v4hi
12888 = build_function_type_list (V8QI_type_node,
12889 V4HI_type_node, V4HI_type_node, NULL_TREE);
12890 tree v4hi_ftype_v2si_v2si
12891 = build_function_type_list (V4HI_type_node,
12892 V2SI_type_node, V2SI_type_node, NULL_TREE);
12893 tree v4sf_ftype_v4sf_v4sf_int
12894 = build_function_type_list (V4SF_type_node,
12895 V4SF_type_node, V4SF_type_node,
12896 integer_type_node, NULL_TREE);
12897 tree v2si_ftype_v4hi_v4hi
12898 = build_function_type_list (V2SI_type_node,
12899 V4HI_type_node, V4HI_type_node, NULL_TREE);
12900 tree v4hi_ftype_v4hi_int
12901 = build_function_type_list (V4HI_type_node,
12902 V4HI_type_node, integer_type_node, NULL_TREE);
12903 tree v4hi_ftype_v4hi_di
12904 = build_function_type_list (V4HI_type_node,
12905 V4HI_type_node, long_long_unsigned_type_node,
12907 tree v2si_ftype_v2si_di
12908 = build_function_type_list (V2SI_type_node,
12909 V2SI_type_node, long_long_unsigned_type_node,
12911 tree void_ftype_void
12912 = build_function_type (void_type_node, void_list_node);
12913 tree void_ftype_unsigned
12914 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12915 tree unsigned_ftype_void
12916 = build_function_type (unsigned_type_node, void_list_node);
12918 = build_function_type (long_long_unsigned_type_node, void_list_node);
12919 tree v4sf_ftype_void
12920 = build_function_type (V4SF_type_node, void_list_node);
12921 tree v2si_ftype_v4sf
12922 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12923 /* Loads/stores. */
12924 tree void_ftype_v8qi_v8qi_pchar
12925 = build_function_type_list (void_type_node,
12926 V8QI_type_node, V8QI_type_node,
12927 pchar_type_node, NULL_TREE);
12928 tree v4sf_ftype_pcfloat
12929 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12930 /* @@@ the type is bogus */
12931 tree v4sf_ftype_v4sf_pv2si
12932 = build_function_type_list (V4SF_type_node,
12933 V4SF_type_node, pv2si_type_node, NULL_TREE);
12934 tree void_ftype_pv2si_v4sf
12935 = build_function_type_list (void_type_node,
12936 pv2si_type_node, V4SF_type_node, NULL_TREE);
12937 tree void_ftype_pfloat_v4sf
12938 = build_function_type_list (void_type_node,
12939 pfloat_type_node, V4SF_type_node, NULL_TREE);
12940 tree void_ftype_pdi_di
12941 = build_function_type_list (void_type_node,
12942 pdi_type_node, long_long_unsigned_type_node,
12944 tree void_ftype_pv2di_v2di
12945 = build_function_type_list (void_type_node,
12946 pv2di_type_node, V2DI_type_node, NULL_TREE);
12947 /* Normal vector unops. */
12948 tree v4sf_ftype_v4sf
12949 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12951 /* Normal vector binops. */
12952 tree v4sf_ftype_v4sf_v4sf
12953 = build_function_type_list (V4SF_type_node,
12954 V4SF_type_node, V4SF_type_node, NULL_TREE);
12955 tree v8qi_ftype_v8qi_v8qi
12956 = build_function_type_list (V8QI_type_node,
12957 V8QI_type_node, V8QI_type_node, NULL_TREE);
12958 tree v4hi_ftype_v4hi_v4hi
12959 = build_function_type_list (V4HI_type_node,
12960 V4HI_type_node, V4HI_type_node, NULL_TREE);
12961 tree v2si_ftype_v2si_v2si
12962 = build_function_type_list (V2SI_type_node,
12963 V2SI_type_node, V2SI_type_node, NULL_TREE);
12964 tree di_ftype_di_di
12965 = build_function_type_list (long_long_unsigned_type_node,
12966 long_long_unsigned_type_node,
12967 long_long_unsigned_type_node, NULL_TREE);
12969 tree v2si_ftype_v2sf
12970 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12971 tree v2sf_ftype_v2si
12972 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12973 tree v2si_ftype_v2si
12974 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12975 tree v2sf_ftype_v2sf
12976 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12977 tree v2sf_ftype_v2sf_v2sf
12978 = build_function_type_list (V2SF_type_node,
12979 V2SF_type_node, V2SF_type_node, NULL_TREE);
12980 tree v2si_ftype_v2sf_v2sf
12981 = build_function_type_list (V2SI_type_node,
12982 V2SF_type_node, V2SF_type_node, NULL_TREE);
12983 tree pint_type_node = build_pointer_type (integer_type_node);
12984 tree pcint_type_node = build_pointer_type (
12985 build_type_variant (integer_type_node, 1, 0));
12986 tree pdouble_type_node = build_pointer_type (double_type_node);
12987 tree pcdouble_type_node = build_pointer_type (
12988 build_type_variant (double_type_node, 1, 0));
12989 tree int_ftype_v2df_v2df
12990 = build_function_type_list (integer_type_node,
12991 V2DF_type_node, V2DF_type_node, NULL_TREE);
12994 = build_function_type (intTI_type_node, void_list_node);
12995 tree v2di_ftype_void
12996 = build_function_type (V2DI_type_node, void_list_node);
12997 tree ti_ftype_ti_ti
12998 = build_function_type_list (intTI_type_node,
12999 intTI_type_node, intTI_type_node, NULL_TREE);
13000 tree void_ftype_pcvoid
13001 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13003 = build_function_type_list (V2DI_type_node,
13004 long_long_unsigned_type_node, NULL_TREE);
13006 = build_function_type_list (long_long_unsigned_type_node,
13007 V2DI_type_node, NULL_TREE);
13008 tree v4sf_ftype_v4si
13009 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13010 tree v4si_ftype_v4sf
13011 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13012 tree v2df_ftype_v4si
13013 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13014 tree v4si_ftype_v2df
13015 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13016 tree v2si_ftype_v2df
13017 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13018 tree v4sf_ftype_v2df
13019 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13020 tree v2df_ftype_v2si
13021 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13022 tree v2df_ftype_v4sf
13023 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13024 tree int_ftype_v2df
13025 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13026 tree v2df_ftype_v2df_int
13027 = build_function_type_list (V2DF_type_node,
13028 V2DF_type_node, integer_type_node, NULL_TREE);
13029 tree v4sf_ftype_v4sf_v2df
13030 = build_function_type_list (V4SF_type_node,
13031 V4SF_type_node, V2DF_type_node, NULL_TREE);
13032 tree v2df_ftype_v2df_v4sf
13033 = build_function_type_list (V2DF_type_node,
13034 V2DF_type_node, V4SF_type_node, NULL_TREE);
13035 tree v2df_ftype_v2df_v2df_int
13036 = build_function_type_list (V2DF_type_node,
13037 V2DF_type_node, V2DF_type_node,
13040 tree v2df_ftype_v2df_pv2si
13041 = build_function_type_list (V2DF_type_node,
13042 V2DF_type_node, pv2si_type_node, NULL_TREE);
13043 tree void_ftype_pv2si_v2df
13044 = build_function_type_list (void_type_node,
13045 pv2si_type_node, V2DF_type_node, NULL_TREE);
13046 tree void_ftype_pdouble_v2df
13047 = build_function_type_list (void_type_node,
13048 pdouble_type_node, V2DF_type_node, NULL_TREE);
13049 tree void_ftype_pint_int
13050 = build_function_type_list (void_type_node,
13051 pint_type_node, integer_type_node, NULL_TREE);
13052 tree void_ftype_v16qi_v16qi_pchar
13053 = build_function_type_list (void_type_node,
13054 V16QI_type_node, V16QI_type_node,
13055 pchar_type_node, NULL_TREE);
13056 tree v2df_ftype_pcdouble
13057 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13058 tree v2df_ftype_v2df_v2df
13059 = build_function_type_list (V2DF_type_node,
13060 V2DF_type_node, V2DF_type_node, NULL_TREE);
13061 tree v16qi_ftype_v16qi_v16qi
13062 = build_function_type_list (V16QI_type_node,
13063 V16QI_type_node, V16QI_type_node, NULL_TREE);
13064 tree v8hi_ftype_v8hi_v8hi
13065 = build_function_type_list (V8HI_type_node,
13066 V8HI_type_node, V8HI_type_node, NULL_TREE);
13067 tree v4si_ftype_v4si_v4si
13068 = build_function_type_list (V4SI_type_node,
13069 V4SI_type_node, V4SI_type_node, NULL_TREE);
13070 tree v2di_ftype_v2di_v2di
13071 = build_function_type_list (V2DI_type_node,
13072 V2DI_type_node, V2DI_type_node, NULL_TREE);
13073 tree v2di_ftype_v2df_v2df
13074 = build_function_type_list (V2DI_type_node,
13075 V2DF_type_node, V2DF_type_node, NULL_TREE);
13076 tree v2df_ftype_v2df
13077 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13078 tree v2df_ftype_double
13079 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13080 tree v2df_ftype_double_double
13081 = build_function_type_list (V2DF_type_node,
13082 double_type_node, double_type_node, NULL_TREE);
13083 tree int_ftype_v8hi_int
13084 = build_function_type_list (integer_type_node,
13085 V8HI_type_node, integer_type_node, NULL_TREE);
13086 tree v8hi_ftype_v8hi_int_int
13087 = build_function_type_list (V8HI_type_node,
13088 V8HI_type_node, integer_type_node,
13089 integer_type_node, NULL_TREE);
13090 tree v2di_ftype_v2di_int
13091 = build_function_type_list (V2DI_type_node,
13092 V2DI_type_node, integer_type_node, NULL_TREE);
13093 tree v4si_ftype_v4si_int
13094 = build_function_type_list (V4SI_type_node,
13095 V4SI_type_node, integer_type_node, NULL_TREE);
13096 tree v8hi_ftype_v8hi_int
13097 = build_function_type_list (V8HI_type_node,
13098 V8HI_type_node, integer_type_node, NULL_TREE);
13099 tree v8hi_ftype_v8hi_v2di
13100 = build_function_type_list (V8HI_type_node,
13101 V8HI_type_node, V2DI_type_node, NULL_TREE);
13102 tree v4si_ftype_v4si_v2di
13103 = build_function_type_list (V4SI_type_node,
13104 V4SI_type_node, V2DI_type_node, NULL_TREE);
13105 tree v4si_ftype_v8hi_v8hi
13106 = build_function_type_list (V4SI_type_node,
13107 V8HI_type_node, V8HI_type_node, NULL_TREE);
13108 tree di_ftype_v8qi_v8qi
13109 = build_function_type_list (long_long_unsigned_type_node,
13110 V8QI_type_node, V8QI_type_node, NULL_TREE);
13111 tree v2di_ftype_v16qi_v16qi
13112 = build_function_type_list (V2DI_type_node,
13113 V16QI_type_node, V16QI_type_node, NULL_TREE);
13114 tree int_ftype_v16qi
13115 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13116 tree v16qi_ftype_pcchar
13117 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13118 tree void_ftype_pchar_v16qi
13119 = build_function_type_list (void_type_node,
13120 pchar_type_node, V16QI_type_node, NULL_TREE);
13121 tree v4si_ftype_pcint
13122 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13123 tree void_ftype_pcint_v4si
13124 = build_function_type_list (void_type_node,
13125 pcint_type_node, V4SI_type_node, NULL_TREE);
13126 tree v2di_ftype_v2di
13127 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13129 /* Add all builtins that are more or less simple operations on two
13131 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13133 /* Use one of the operands; the target can have a different mode for
13134 mask-generating compares. */
13135 enum machine_mode mode;
13140 mode = insn_data[d->icode].operand[1].mode;
13145 type = v16qi_ftype_v16qi_v16qi;
13148 type = v8hi_ftype_v8hi_v8hi;
13151 type = v4si_ftype_v4si_v4si;
13154 type = v2di_ftype_v2di_v2di;
13157 type = v2df_ftype_v2df_v2df;
13160 type = ti_ftype_ti_ti;
13163 type = v4sf_ftype_v4sf_v4sf;
13166 type = v8qi_ftype_v8qi_v8qi;
13169 type = v4hi_ftype_v4hi_v4hi;
13172 type = v2si_ftype_v2si_v2si;
13175 type = di_ftype_di_di;
13182 /* Override for comparisons. */
13183 if (d->icode == CODE_FOR_maskcmpv4sf3
13184 || d->icode == CODE_FOR_maskncmpv4sf3
13185 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13186 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13187 type = v4si_ftype_v4sf_v4sf;
13189 if (d->icode == CODE_FOR_maskcmpv2df3
13190 || d->icode == CODE_FOR_maskncmpv2df3
13191 || d->icode == CODE_FOR_vmmaskcmpv2df3
13192 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13193 type = v2di_ftype_v2df_v2df;
13195 def_builtin (d->mask, d->name, type, d->code);
13198 /* Add the remaining MMX insns with somewhat more complicated types. */
13199 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13200 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13201 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13202 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13203 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13205 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13206 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13207 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13209 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13210 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13212 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13213 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13215 /* comi/ucomi insns. */
13216 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13217 if (d->mask == MASK_SSE2)
13218 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13220 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13222 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13223 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13224 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13226 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13227 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13228 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13229 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13230 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13231 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13232 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13233 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13235 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13236 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13238 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13240 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13241 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13242 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13243 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13244 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13245 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13247 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13248 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13249 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13250 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13252 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13253 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13254 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13255 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13257 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13259 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13261 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13262 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13263 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13264 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13265 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13266 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13268 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13270 /* Original 3DNow! */
13271 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13272 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13273 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13288 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13289 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13290 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13292 /* 3DNow! extension as used in the Athlon CPU. */
13293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13295 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13296 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13297 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13298 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13300 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13303 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13304 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13306 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13307 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13308 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13311 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13312 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13313 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13318 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13320 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13323 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13324 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13325 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13329 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13330 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13334 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13336 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13338 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13343 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13344 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13345 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13352 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13357 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13360 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13362 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13364 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13366 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13368 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13374 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13376 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13377 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13378 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13380 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13382 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13383 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13386 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13390 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13391 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13393 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13394 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13398 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13399 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13401 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13404 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13406 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13409 /* Errors in the source file can cause expand_expr to return const0_rtx
13410 where we expect a vector. To avoid crashing, use one of the vector
13411 clear instructions. */
13413 safe_vector_operand (x, mode)
13415 enum machine_mode mode;
13417 if (x != const0_rtx)
13419 x = gen_reg_rtx (mode);
13421 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13422 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13423 : gen_rtx_SUBREG (DImode, x, 0)));
13425 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13426 : gen_rtx_SUBREG (V4SFmode, x, 0),
13427 CONST0_RTX (V4SFmode)));
13431 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13434 ix86_expand_binop_builtin (icode, arglist, target)
13435 enum insn_code icode;
13440 tree arg0 = TREE_VALUE (arglist);
13441 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13442 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13443 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13444 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13445 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13446 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13448 if (VECTOR_MODE_P (mode0))
13449 op0 = safe_vector_operand (op0, mode0);
13450 if (VECTOR_MODE_P (mode1))
13451 op1 = safe_vector_operand (op1, mode1);
13454 || GET_MODE (target) != tmode
13455 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13456 target = gen_reg_rtx (tmode);
13458 if (GET_MODE (op1) == SImode && mode1 == TImode)
13460 rtx x = gen_reg_rtx (V4SImode);
13461 emit_insn (gen_sse2_loadd (x, op1));
13462 op1 = gen_lowpart (TImode, x);
13465 /* In case the insn wants input operands in modes different from
13466 the result, abort. */
13467 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13470 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13471 op0 = copy_to_mode_reg (mode0, op0);
13472 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13473 op1 = copy_to_mode_reg (mode1, op1);
13475 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13476 yet one of the two must not be a memory. This is normally enforced
13477 by expanders, but we didn't bother to create one here. */
13478 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13479 op0 = copy_to_mode_reg (mode0, op0);
13481 pat = GEN_FCN (icode) (target, op0, op1);
13488 /* Subroutine of ix86_expand_builtin to take care of stores. */
13491 ix86_expand_store_builtin (icode, arglist)
13492 enum insn_code icode;
13496 tree arg0 = TREE_VALUE (arglist);
13497 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13498 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13499 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13500 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13501 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13503 if (VECTOR_MODE_P (mode1))
13504 op1 = safe_vector_operand (op1, mode1);
13506 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13507 op1 = copy_to_mode_reg (mode1, op1);
13509 pat = GEN_FCN (icode) (op0, op1);
13515 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13518 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13519 enum insn_code icode;
13525 tree arg0 = TREE_VALUE (arglist);
13526 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13527 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13528 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13531 || GET_MODE (target) != tmode
13532 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13533 target = gen_reg_rtx (tmode);
13535 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13538 if (VECTOR_MODE_P (mode0))
13539 op0 = safe_vector_operand (op0, mode0);
13541 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13542 op0 = copy_to_mode_reg (mode0, op0);
13545 pat = GEN_FCN (icode) (target, op0);
13552 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13553 sqrtss, rsqrtss, rcpss. */
13556 ix86_expand_unop1_builtin (icode, arglist, target)
13557 enum insn_code icode;
13562 tree arg0 = TREE_VALUE (arglist);
13563 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13564 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13565 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13568 || GET_MODE (target) != tmode
13569 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13570 target = gen_reg_rtx (tmode);
13572 if (VECTOR_MODE_P (mode0))
13573 op0 = safe_vector_operand (op0, mode0);
13575 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13576 op0 = copy_to_mode_reg (mode0, op0);
13579 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13580 op1 = copy_to_mode_reg (mode0, op1);
13582 pat = GEN_FCN (icode) (target, op0, op1);
13589 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13592 ix86_expand_sse_compare (d, arglist, target)
13593 const struct builtin_description *d;
13598 tree arg0 = TREE_VALUE (arglist);
13599 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13600 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13601 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13603 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13604 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13605 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13606 enum rtx_code comparison = d->comparison;
13608 if (VECTOR_MODE_P (mode0))
13609 op0 = safe_vector_operand (op0, mode0);
13610 if (VECTOR_MODE_P (mode1))
13611 op1 = safe_vector_operand (op1, mode1);
13613 /* Swap operands if we have a comparison that isn't available in
13617 rtx tmp = gen_reg_rtx (mode1);
13618 emit_move_insn (tmp, op1);
13624 || GET_MODE (target) != tmode
13625 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13626 target = gen_reg_rtx (tmode);
13628 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13629 op0 = copy_to_mode_reg (mode0, op0);
13630 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13631 op1 = copy_to_mode_reg (mode1, op1);
13633 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13634 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13641 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13644 ix86_expand_sse_comi (d, arglist, target)
13645 const struct builtin_description *d;
13650 tree arg0 = TREE_VALUE (arglist);
13651 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13652 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13653 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13655 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13656 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13657 enum rtx_code comparison = d->comparison;
13659 if (VECTOR_MODE_P (mode0))
13660 op0 = safe_vector_operand (op0, mode0);
13661 if (VECTOR_MODE_P (mode1))
13662 op1 = safe_vector_operand (op1, mode1);
13664 /* Swap operands if we have a comparison that isn't available in
13673 target = gen_reg_rtx (SImode);
13674 emit_move_insn (target, const0_rtx);
13675 target = gen_rtx_SUBREG (QImode, target, 0);
13677 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13678 op0 = copy_to_mode_reg (mode0, op0);
13679 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13680 op1 = copy_to_mode_reg (mode1, op1);
13682 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13683 pat = GEN_FCN (d->icode) (op0, op1);
13687 emit_insn (gen_rtx_SET (VOIDmode,
13688 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13689 gen_rtx_fmt_ee (comparison, QImode,
13693 return SUBREG_REG (target);
13696 /* Expand an expression EXP that calls a built-in function,
13697 with result going to TARGET if that's convenient
13698 (and in mode MODE if that's convenient).
13699 SUBTARGET may be used as the target for computing one of EXP's operands.
13700 IGNORE is nonzero if the value is to be ignored. */
13703 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13706 rtx subtarget ATTRIBUTE_UNUSED;
13707 enum machine_mode mode ATTRIBUTE_UNUSED;
13708 int ignore ATTRIBUTE_UNUSED;
13710 const struct builtin_description *d;
13712 enum insn_code icode;
13713 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13714 tree arglist = TREE_OPERAND (exp, 1);
13715 tree arg0, arg1, arg2;
13716 rtx op0, op1, op2, pat;
13717 enum machine_mode tmode, mode0, mode1, mode2;
13718 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13722 case IX86_BUILTIN_EMMS:
13723 emit_insn (gen_emms ());
13726 case IX86_BUILTIN_SFENCE:
13727 emit_insn (gen_sfence ());
13730 case IX86_BUILTIN_PEXTRW:
13731 case IX86_BUILTIN_PEXTRW128:
13732 icode = (fcode == IX86_BUILTIN_PEXTRW
13733 ? CODE_FOR_mmx_pextrw
13734 : CODE_FOR_sse2_pextrw);
13735 arg0 = TREE_VALUE (arglist);
13736 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13737 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13738 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13739 tmode = insn_data[icode].operand[0].mode;
13740 mode0 = insn_data[icode].operand[1].mode;
13741 mode1 = insn_data[icode].operand[2].mode;
13743 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13744 op0 = copy_to_mode_reg (mode0, op0);
13745 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13747 /* @@@ better error message */
13748 error ("selector must be an immediate");
13749 return gen_reg_rtx (tmode);
13752 || GET_MODE (target) != tmode
13753 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13754 target = gen_reg_rtx (tmode);
13755 pat = GEN_FCN (icode) (target, op0, op1);
13761 case IX86_BUILTIN_PINSRW:
13762 case IX86_BUILTIN_PINSRW128:
13763 icode = (fcode == IX86_BUILTIN_PINSRW
13764 ? CODE_FOR_mmx_pinsrw
13765 : CODE_FOR_sse2_pinsrw);
13766 arg0 = TREE_VALUE (arglist);
13767 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13768 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13769 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13770 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13771 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13772 tmode = insn_data[icode].operand[0].mode;
13773 mode0 = insn_data[icode].operand[1].mode;
13774 mode1 = insn_data[icode].operand[2].mode;
13775 mode2 = insn_data[icode].operand[3].mode;
13777 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13778 op0 = copy_to_mode_reg (mode0, op0);
13779 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13780 op1 = copy_to_mode_reg (mode1, op1);
13781 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13783 /* @@@ better error message */
13784 error ("selector must be an immediate");
13788 || GET_MODE (target) != tmode
13789 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13790 target = gen_reg_rtx (tmode);
13791 pat = GEN_FCN (icode) (target, op0, op1, op2);
13797 case IX86_BUILTIN_MASKMOVQ:
13798 case IX86_BUILTIN_MASKMOVDQU:
13799 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13800 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13801 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13802 : CODE_FOR_sse2_maskmovdqu));
13803 /* Note the arg order is different from the operand order. */
13804 arg1 = TREE_VALUE (arglist);
13805 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13806 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13807 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13808 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13809 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13810 mode0 = insn_data[icode].operand[0].mode;
13811 mode1 = insn_data[icode].operand[1].mode;
13812 mode2 = insn_data[icode].operand[2].mode;
13814 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13815 op0 = copy_to_mode_reg (mode0, op0);
13816 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13817 op1 = copy_to_mode_reg (mode1, op1);
13818 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13819 op2 = copy_to_mode_reg (mode2, op2);
13820 pat = GEN_FCN (icode) (op0, op1, op2);
13826 case IX86_BUILTIN_SQRTSS:
13827 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13828 case IX86_BUILTIN_RSQRTSS:
13829 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13830 case IX86_BUILTIN_RCPSS:
13831 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13833 case IX86_BUILTIN_LOADAPS:
13834 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13836 case IX86_BUILTIN_LOADUPS:
13837 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13839 case IX86_BUILTIN_STOREAPS:
13840 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13842 case IX86_BUILTIN_STOREUPS:
13843 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13845 case IX86_BUILTIN_LOADSS:
13846 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13848 case IX86_BUILTIN_STORESS:
13849 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13851 case IX86_BUILTIN_LOADHPS:
13852 case IX86_BUILTIN_LOADLPS:
13853 case IX86_BUILTIN_LOADHPD:
13854 case IX86_BUILTIN_LOADLPD:
13855 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13856 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13857 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13858 : CODE_FOR_sse2_movlpd);
13859 arg0 = TREE_VALUE (arglist);
13860 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13861 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13862 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13863 tmode = insn_data[icode].operand[0].mode;
13864 mode0 = insn_data[icode].operand[1].mode;
13865 mode1 = insn_data[icode].operand[2].mode;
13867 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13868 op0 = copy_to_mode_reg (mode0, op0);
13869 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13871 || GET_MODE (target) != tmode
13872 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13873 target = gen_reg_rtx (tmode);
13874 pat = GEN_FCN (icode) (target, op0, op1);
13880 case IX86_BUILTIN_STOREHPS:
13881 case IX86_BUILTIN_STORELPS:
13882 case IX86_BUILTIN_STOREHPD:
13883 case IX86_BUILTIN_STORELPD:
13884 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13885 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13886 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13887 : CODE_FOR_sse2_movlpd);
13888 arg0 = TREE_VALUE (arglist);
13889 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13890 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13891 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13892 mode0 = insn_data[icode].operand[1].mode;
13893 mode1 = insn_data[icode].operand[2].mode;
13895 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13896 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13897 op1 = copy_to_mode_reg (mode1, op1);
13899 pat = GEN_FCN (icode) (op0, op0, op1);
13905 case IX86_BUILTIN_MOVNTPS:
13906 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13907 case IX86_BUILTIN_MOVNTQ:
13908 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13910 case IX86_BUILTIN_LDMXCSR:
13911 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13912 target = assign_386_stack_local (SImode, 0);
13913 emit_move_insn (target, op0);
13914 emit_insn (gen_ldmxcsr (target));
13917 case IX86_BUILTIN_STMXCSR:
13918 target = assign_386_stack_local (SImode, 0);
13919 emit_insn (gen_stmxcsr (target));
13920 return copy_to_mode_reg (SImode, target);
13922 case IX86_BUILTIN_SHUFPS:
13923 case IX86_BUILTIN_SHUFPD:
13924 icode = (fcode == IX86_BUILTIN_SHUFPS
13925 ? CODE_FOR_sse_shufps
13926 : CODE_FOR_sse2_shufpd);
13927 arg0 = TREE_VALUE (arglist);
13928 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13929 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13930 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13931 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13932 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13933 tmode = insn_data[icode].operand[0].mode;
13934 mode0 = insn_data[icode].operand[1].mode;
13935 mode1 = insn_data[icode].operand[2].mode;
13936 mode2 = insn_data[icode].operand[3].mode;
13938 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13939 op0 = copy_to_mode_reg (mode0, op0);
13940 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13941 op1 = copy_to_mode_reg (mode1, op1);
13942 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13944 /* @@@ better error message */
13945 error ("mask must be an immediate");
13946 return gen_reg_rtx (tmode);
13949 || GET_MODE (target) != tmode
13950 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13951 target = gen_reg_rtx (tmode);
13952 pat = GEN_FCN (icode) (target, op0, op1, op2);
13958 case IX86_BUILTIN_PSHUFW:
13959 case IX86_BUILTIN_PSHUFD:
13960 case IX86_BUILTIN_PSHUFHW:
13961 case IX86_BUILTIN_PSHUFLW:
13962 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13963 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13964 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13965 : CODE_FOR_mmx_pshufw);
13966 arg0 = TREE_VALUE (arglist);
13967 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13968 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13969 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13970 tmode = insn_data[icode].operand[0].mode;
13971 mode1 = insn_data[icode].operand[1].mode;
13972 mode2 = insn_data[icode].operand[2].mode;
13974 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13975 op0 = copy_to_mode_reg (mode1, op0);
13976 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13978 /* @@@ better error message */
13979 error ("mask must be an immediate");
13983 || GET_MODE (target) != tmode
13984 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13985 target = gen_reg_rtx (tmode);
13986 pat = GEN_FCN (icode) (target, op0, op1);
13992 case IX86_BUILTIN_PSLLDQI128:
13993 case IX86_BUILTIN_PSRLDQI128:
13994 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13995 : CODE_FOR_sse2_lshrti3);
13996 arg0 = TREE_VALUE (arglist);
13997 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13998 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13999 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14000 tmode = insn_data[icode].operand[0].mode;
14001 mode1 = insn_data[icode].operand[1].mode;
14002 mode2 = insn_data[icode].operand[2].mode;
14004 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14006 op0 = copy_to_reg (op0);
14007 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14009 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14011 error ("shift must be an immediate");
14014 target = gen_reg_rtx (V2DImode);
14015 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14021 case IX86_BUILTIN_FEMMS:
14022 emit_insn (gen_femms ());
14025 case IX86_BUILTIN_PAVGUSB:
14026 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14028 case IX86_BUILTIN_PF2ID:
14029 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14031 case IX86_BUILTIN_PFACC:
14032 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14034 case IX86_BUILTIN_PFADD:
14035 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14037 case IX86_BUILTIN_PFCMPEQ:
14038 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14040 case IX86_BUILTIN_PFCMPGE:
14041 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14043 case IX86_BUILTIN_PFCMPGT:
14044 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14046 case IX86_BUILTIN_PFMAX:
14047 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14049 case IX86_BUILTIN_PFMIN:
14050 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14052 case IX86_BUILTIN_PFMUL:
14053 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14055 case IX86_BUILTIN_PFRCP:
14056 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14058 case IX86_BUILTIN_PFRCPIT1:
14059 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14061 case IX86_BUILTIN_PFRCPIT2:
14062 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14064 case IX86_BUILTIN_PFRSQIT1:
14065 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14067 case IX86_BUILTIN_PFRSQRT:
14068 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14070 case IX86_BUILTIN_PFSUB:
14071 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14073 case IX86_BUILTIN_PFSUBR:
14074 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14076 case IX86_BUILTIN_PI2FD:
14077 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14079 case IX86_BUILTIN_PMULHRW:
14080 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14082 case IX86_BUILTIN_PF2IW:
14083 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14085 case IX86_BUILTIN_PFNACC:
14086 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14088 case IX86_BUILTIN_PFPNACC:
14089 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14091 case IX86_BUILTIN_PI2FW:
14092 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14094 case IX86_BUILTIN_PSWAPDSI:
14095 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14097 case IX86_BUILTIN_PSWAPDSF:
14098 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14100 case IX86_BUILTIN_SSE_ZERO:
14101 target = gen_reg_rtx (V4SFmode);
14102 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14105 case IX86_BUILTIN_MMX_ZERO:
14106 target = gen_reg_rtx (DImode);
14107 emit_insn (gen_mmx_clrdi (target));
14110 case IX86_BUILTIN_CLRTI:
14111 target = gen_reg_rtx (V2DImode);
14112 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14116 case IX86_BUILTIN_SQRTSD:
14117 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14118 case IX86_BUILTIN_LOADAPD:
14119 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14120 case IX86_BUILTIN_LOADUPD:
14121 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14123 case IX86_BUILTIN_STOREAPD:
14124 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14125 case IX86_BUILTIN_STOREUPD:
14126 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14128 case IX86_BUILTIN_LOADSD:
14129 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14131 case IX86_BUILTIN_STORESD:
14132 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14134 case IX86_BUILTIN_SETPD1:
14135 target = assign_386_stack_local (DFmode, 0);
14136 arg0 = TREE_VALUE (arglist);
14137 emit_move_insn (adjust_address (target, DFmode, 0),
14138 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14139 op0 = gen_reg_rtx (V2DFmode);
14140 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14141 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14144 case IX86_BUILTIN_SETPD:
14145 target = assign_386_stack_local (V2DFmode, 0);
14146 arg0 = TREE_VALUE (arglist);
14147 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14148 emit_move_insn (adjust_address (target, DFmode, 0),
14149 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14150 emit_move_insn (adjust_address (target, DFmode, 8),
14151 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14152 op0 = gen_reg_rtx (V2DFmode);
14153 emit_insn (gen_sse2_movapd (op0, target));
14156 case IX86_BUILTIN_LOADRPD:
14157 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14158 gen_reg_rtx (V2DFmode), 1);
14159 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14162 case IX86_BUILTIN_LOADPD1:
14163 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14164 gen_reg_rtx (V2DFmode), 1);
14165 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14168 case IX86_BUILTIN_STOREPD1:
14169 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14170 case IX86_BUILTIN_STORERPD:
14171 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14173 case IX86_BUILTIN_CLRPD:
14174 target = gen_reg_rtx (V2DFmode);
14175 emit_insn (gen_sse_clrv2df (target));
14178 case IX86_BUILTIN_MFENCE:
14179 emit_insn (gen_sse2_mfence ());
14181 case IX86_BUILTIN_LFENCE:
14182 emit_insn (gen_sse2_lfence ());
14185 case IX86_BUILTIN_CLFLUSH:
14186 arg0 = TREE_VALUE (arglist);
14187 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14188 icode = CODE_FOR_sse2_clflush;
14189 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14190 op0 = copy_to_mode_reg (Pmode, op0);
14192 emit_insn (gen_sse2_clflush (op0));
14195 case IX86_BUILTIN_MOVNTPD:
14196 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14197 case IX86_BUILTIN_MOVNTDQ:
14198 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14199 case IX86_BUILTIN_MOVNTI:
14200 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14202 case IX86_BUILTIN_LOADDQA:
14203 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14204 case IX86_BUILTIN_LOADDQU:
14205 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14206 case IX86_BUILTIN_LOADD:
14207 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14209 case IX86_BUILTIN_STOREDQA:
14210 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14211 case IX86_BUILTIN_STOREDQU:
14212 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14213 case IX86_BUILTIN_STORED:
14214 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14220 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14221 if (d->code == fcode)
14223 /* Compares are treated specially. */
14224 if (d->icode == CODE_FOR_maskcmpv4sf3
14225 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14226 || d->icode == CODE_FOR_maskncmpv4sf3
14227 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14228 || d->icode == CODE_FOR_maskcmpv2df3
14229 || d->icode == CODE_FOR_vmmaskcmpv2df3
14230 || d->icode == CODE_FOR_maskncmpv2df3
14231 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14232 return ix86_expand_sse_compare (d, arglist, target);
14234 return ix86_expand_binop_builtin (d->icode, arglist, target);
14237 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14238 if (d->code == fcode)
14239 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14241 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14242 if (d->code == fcode)
14243 return ix86_expand_sse_comi (d, arglist, target);
14245 /* @@@ Should really do something sensible here. */
14249 /* Store OPERAND to the memory after reload is completed. This means
14250 that we can't easily use assign_stack_local. */
14252 ix86_force_to_memory (mode, operand)
14253 enum machine_mode mode;
14257 if (!reload_completed)
14259 if (TARGET_64BIT && TARGET_RED_ZONE)
14261 result = gen_rtx_MEM (mode,
14262 gen_rtx_PLUS (Pmode,
14264 GEN_INT (-RED_ZONE_SIZE)));
14265 emit_move_insn (result, operand);
14267 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14273 operand = gen_lowpart (DImode, operand);
14277 gen_rtx_SET (VOIDmode,
14278 gen_rtx_MEM (DImode,
14279 gen_rtx_PRE_DEC (DImode,
14280 stack_pointer_rtx)),
14286 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14295 split_di (&operand, 1, operands, operands + 1);
14297 gen_rtx_SET (VOIDmode,
14298 gen_rtx_MEM (SImode,
14299 gen_rtx_PRE_DEC (Pmode,
14300 stack_pointer_rtx)),
14303 gen_rtx_SET (VOIDmode,
14304 gen_rtx_MEM (SImode,
14305 gen_rtx_PRE_DEC (Pmode,
14306 stack_pointer_rtx)),
14311 /* It is better to store HImodes as SImodes. */
14312 if (!TARGET_PARTIAL_REG_STALL)
14313 operand = gen_lowpart (SImode, operand);
14317 gen_rtx_SET (VOIDmode,
14318 gen_rtx_MEM (GET_MODE (operand),
14319 gen_rtx_PRE_DEC (SImode,
14320 stack_pointer_rtx)),
14326 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14331 /* Free operand from the memory. */
14333 ix86_free_from_memory (mode)
14334 enum machine_mode mode;
14336 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14340 if (mode == DImode || TARGET_64BIT)
14342 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14346 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14347 to pop or add instruction if registers are available. */
14348 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14349 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14354 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14355 QImode must go into class Q_REGS.
14356 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14357 movdf to do mem-to-mem moves through integer regs. */
14359 ix86_preferred_reload_class (x, class)
14361 enum reg_class class;
14363 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14365 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14367 /* SSE can't load any constant directly yet. */
14368 if (SSE_CLASS_P (class))
14370 /* Floats can load 0 and 1. */
14371 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14373 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14374 if (MAYBE_SSE_CLASS_P (class))
14375 return (reg_class_subset_p (class, GENERAL_REGS)
14376 ? GENERAL_REGS : FLOAT_REGS);
14380 /* General regs can load everything. */
14381 if (reg_class_subset_p (class, GENERAL_REGS))
14382 return GENERAL_REGS;
14383 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14384 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14387 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14389 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14394 /* If we are copying between general and FP registers, we need a memory
14395 location. The same is true for SSE and MMX registers.
14397 The macro can't work reliably when one of the CLASSES is class containing
14398 registers from multiple units (SSE, MMX, integer). We avoid this by never
14399 combining those units in single alternative in the machine description.
14400 Ensure that this constraint holds to avoid unexpected surprises.
14402 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14403 enforce these sanity checks. */
14405 ix86_secondary_memory_needed (class1, class2, mode, strict)
14406 enum reg_class class1, class2;
14407 enum machine_mode mode;
14410 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14411 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14412 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14413 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14414 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14415 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14422 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14423 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14424 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14425 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14426 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14428 /* Return the cost of moving data from a register in class CLASS1 to
14429 one in class CLASS2.
14431 It is not required that the cost always equal 2 when FROM is the same as TO;
14432 on some machines it is expensive to move between registers if they are not
14433 general registers. */
14435 ix86_register_move_cost (mode, class1, class2)
14436 enum machine_mode mode;
14437 enum reg_class class1, class2;
14439 /* In case we require secondary memory, compute cost of the store followed
14440 by load. In order to avoid bad register allocation choices, we need
14441 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14443 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14447 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14448 MEMORY_MOVE_COST (mode, class1, 1));
14449 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14450 MEMORY_MOVE_COST (mode, class2, 1));
14452 /* In case of copying from general_purpose_register we may emit multiple
14453 stores followed by single load causing memory size mismatch stall.
14454 Count this as arbitrarily high cost of 20. */
14455 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14458 /* In the case of FP/MMX moves, the registers actually overlap, and we
14459 have to switch modes in order to treat them differently. */
14460 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14461 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14467 /* Moves between SSE/MMX and integer unit are expensive. */
14468 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14469 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14470 return ix86_cost->mmxsse_to_integer;
14471 if (MAYBE_FLOAT_CLASS_P (class1))
14472 return ix86_cost->fp_move;
14473 if (MAYBE_SSE_CLASS_P (class1))
14474 return ix86_cost->sse_move;
14475 if (MAYBE_MMX_CLASS_P (class1))
14476 return ix86_cost->mmx_move;
14480 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14482 ix86_hard_regno_mode_ok (regno, mode)
14484 enum machine_mode mode;
14486 /* Flags and only flags can only hold CCmode values. */
14487 if (CC_REGNO_P (regno))
14488 return GET_MODE_CLASS (mode) == MODE_CC;
14489 if (GET_MODE_CLASS (mode) == MODE_CC
14490 || GET_MODE_CLASS (mode) == MODE_RANDOM
14491 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14493 if (FP_REGNO_P (regno))
14494 return VALID_FP_MODE_P (mode);
14495 if (SSE_REGNO_P (regno))
14496 return VALID_SSE_REG_MODE (mode);
14497 if (MMX_REGNO_P (regno))
14498 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14499 /* We handle both integer and floats in the general purpose registers.
14500 In future we should be able to handle vector modes as well. */
14501 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14503 /* Take care for QImode values - they can be in non-QI regs, but then
14504 they do cause partial register stalls. */
14505 if (regno < 4 || mode != QImode || TARGET_64BIT)
14507 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14510 /* Return the cost of moving data of mode M between a
14511 register and memory. A value of 2 is the default; this cost is
14512 relative to those in `REGISTER_MOVE_COST'.
14514 If moving between registers and memory is more expensive than
14515 between two registers, you should define this macro to express the
14518 Model also increased moving costs of QImode registers in non
14522 ix86_memory_move_cost (mode, class, in)
14523 enum machine_mode mode;
14524 enum reg_class class;
14527 if (FLOAT_CLASS_P (class))
14545 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14547 if (SSE_CLASS_P (class))
14550 switch (GET_MODE_SIZE (mode))
14564 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14566 if (MMX_CLASS_P (class))
14569 switch (GET_MODE_SIZE (mode))
14580 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14582 switch (GET_MODE_SIZE (mode))
14586 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14587 : ix86_cost->movzbl_load);
14589 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14590 : ix86_cost->int_store[0] + 4);
14593 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14595 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14596 if (mode == TFmode)
14598 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14599 * ((int) GET_MODE_SIZE (mode)
14600 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14604 /* Compute a (partial) cost for rtx X. Return true if the complete
14605 cost has been computed, and false if subexpressions should be
14606 scanned. In either case, *TOTAL contains the cost result. */
14609 ix86_rtx_costs (x, code, outer_code, total)
14611 int code, outer_code;
14614 enum machine_mode mode = GET_MODE (x);
14622 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14624 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14626 else if (flag_pic && SYMBOLIC_CONST (x))
14633 if (mode == VOIDmode)
14636 switch (standard_80387_constant_p (x))
14645 /* Start with (MEM (SYMBOL_REF)), since that's where
14646 it'll probably end up. Add a penalty for size. */
14647 *total = (COSTS_N_INSNS (1)
14649 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14655 /* The zero extensions is often completely free on x86_64, so make
14656 it as cheap as possible. */
14657 if (TARGET_64BIT && mode == DImode
14658 && GET_MODE (XEXP (x, 0)) == SImode)
14660 else if (TARGET_ZERO_EXTEND_WITH_AND)
14661 *total = COSTS_N_INSNS (ix86_cost->add);
14663 *total = COSTS_N_INSNS (ix86_cost->movzx);
14667 *total = COSTS_N_INSNS (ix86_cost->movsx);
14671 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14672 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14674 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14677 *total = COSTS_N_INSNS (ix86_cost->add);
14680 if ((value == 2 || value == 3)
14681 && !TARGET_DECOMPOSE_LEA
14682 && ix86_cost->lea <= ix86_cost->shift_const)
14684 *total = COSTS_N_INSNS (ix86_cost->lea);
14694 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14696 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14698 if (INTVAL (XEXP (x, 1)) > 32)
14699 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14701 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14705 if (GET_CODE (XEXP (x, 1)) == AND)
14706 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14708 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14713 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14714 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14716 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14721 if (FLOAT_MODE_P (mode))
14722 *total = COSTS_N_INSNS (ix86_cost->fmul);
14723 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14725 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14728 for (nbits = 0; value != 0; value >>= 1)
14731 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14732 + nbits * ix86_cost->mult_bit);
14736 /* This is arbitrary */
14737 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14738 + 7 * ix86_cost->mult_bit);
14746 if (FLOAT_MODE_P (mode))
14747 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14749 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14753 if (FLOAT_MODE_P (mode))
14754 *total = COSTS_N_INSNS (ix86_cost->fadd);
14755 else if (!TARGET_DECOMPOSE_LEA
14756 && GET_MODE_CLASS (mode) == MODE_INT
14757 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14759 if (GET_CODE (XEXP (x, 0)) == PLUS
14760 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14761 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14762 && CONSTANT_P (XEXP (x, 1)))
14764 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14765 if (val == 2 || val == 4 || val == 8)
14767 *total = COSTS_N_INSNS (ix86_cost->lea);
14768 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14769 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14771 *total += rtx_cost (XEXP (x, 1), outer_code);
14775 else if (GET_CODE (XEXP (x, 0)) == MULT
14776 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14778 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14779 if (val == 2 || val == 4 || val == 8)
14781 *total = COSTS_N_INSNS (ix86_cost->lea);
14782 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14783 *total += rtx_cost (XEXP (x, 1), outer_code);
14787 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14789 *total = COSTS_N_INSNS (ix86_cost->lea);
14790 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14791 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14792 *total += rtx_cost (XEXP (x, 1), outer_code);
14799 if (FLOAT_MODE_P (mode))
14801 *total = COSTS_N_INSNS (ix86_cost->fadd);
14809 if (!TARGET_64BIT && mode == DImode)
14811 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14812 + (rtx_cost (XEXP (x, 0), outer_code)
14813 << (GET_MODE (XEXP (x, 0)) != DImode))
14814 + (rtx_cost (XEXP (x, 1), outer_code)
14815 << (GET_MODE (XEXP (x, 1)) != DImode)));
14821 if (FLOAT_MODE_P (mode))
14823 *total = COSTS_N_INSNS (ix86_cost->fchs);
14829 if (!TARGET_64BIT && mode == DImode)
14830 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14832 *total = COSTS_N_INSNS (ix86_cost->add);
14836 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14841 if (FLOAT_MODE_P (mode))
14842 *total = COSTS_N_INSNS (ix86_cost->fabs);
14846 if (FLOAT_MODE_P (mode))
14847 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14855 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14857 ix86_svr3_asm_out_constructor (symbol, priority)
14859 int priority ATTRIBUTE_UNUSED;
14862 fputs ("\tpushl $", asm_out_file);
14863 assemble_name (asm_out_file, XSTR (symbol, 0));
14864 fputc ('\n', asm_out_file);
14870 static int current_machopic_label_num;
14872 /* Given a symbol name and its associated stub, write out the
14873 definition of the stub. */
14876 machopic_output_stub (file, symb, stub)
14878 const char *symb, *stub;
14880 unsigned int length;
14881 char *binder_name, *symbol_name, lazy_ptr_name[32];
14882 int label = ++current_machopic_label_num;
14884 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14885 symb = (*targetm.strip_name_encoding) (symb);
14887 length = strlen (stub);
14888 binder_name = alloca (length + 32);
14889 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14891 length = strlen (symb);
14892 symbol_name = alloca (length + 32);
14893 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14895 sprintf (lazy_ptr_name, "L%d$lz", label);
14898 machopic_picsymbol_stub_section ();
14900 machopic_symbol_stub_section ();
14902 fprintf (file, "%s:\n", stub);
14903 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14907 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14908 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14909 fprintf (file, "\tjmp %%edx\n");
14912 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14914 fprintf (file, "%s:\n", binder_name);
14918 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14919 fprintf (file, "\tpushl %%eax\n");
14922 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14924 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14926 machopic_lazy_symbol_ptr_section ();
14927 fprintf (file, "%s:\n", lazy_ptr_name);
14928 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14929 fprintf (file, "\t.long %s\n", binder_name);
14931 #endif /* TARGET_MACHO */
14933 /* Order the registers for register allocator. */
14936 x86_order_regs_for_local_alloc ()
14941 /* First allocate the local general purpose registers. */
14942 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14943 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14944 reg_alloc_order [pos++] = i;
14946 /* Global general purpose registers. */
14947 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14948 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14949 reg_alloc_order [pos++] = i;
14951 /* x87 registers come first in case we are doing FP math
14953 if (!TARGET_SSE_MATH)
14954 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14955 reg_alloc_order [pos++] = i;
14957 /* SSE registers. */
14958 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14959 reg_alloc_order [pos++] = i;
14960 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14961 reg_alloc_order [pos++] = i;
14963 /* x87 registers. */
14964 if (TARGET_SSE_MATH)
14965 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14966 reg_alloc_order [pos++] = i;
14968 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14969 reg_alloc_order [pos++] = i;
14971 /* Initialize the rest of array as we do not allocate some registers
14973 while (pos < FIRST_PSEUDO_REGISTER)
14974 reg_alloc_order [pos++] = 0;
14977 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14978 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14981 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14982 struct attribute_spec.handler. */
14984 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14987 tree args ATTRIBUTE_UNUSED;
14988 int flags ATTRIBUTE_UNUSED;
14989 bool *no_add_attrs;
14992 if (DECL_P (*node))
14994 if (TREE_CODE (*node) == TYPE_DECL)
14995 type = &TREE_TYPE (*node);
15000 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15001 || TREE_CODE (*type) == UNION_TYPE)))
15003 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15004 *no_add_attrs = true;
15007 else if ((is_attribute_p ("ms_struct", name)
15008 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15009 || ((is_attribute_p ("gcc_struct", name)
15010 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15012 warning ("`%s' incompatible attribute ignored",
15013 IDENTIFIER_POINTER (name));
15014 *no_add_attrs = true;
15021 ix86_ms_bitfield_layout_p (record_type)
15024 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15025 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15026 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15029 /* Returns an expression indicating where the this parameter is
15030 located on entry to the FUNCTION. */
15033 x86_this_parameter (function)
15036 tree type = TREE_TYPE (function);
15040 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15041 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15044 if (ix86_fntype_regparm (type) > 0)
15048 parm = TYPE_ARG_TYPES (type);
15049 /* Figure out whether or not the function has a variable number of
15051 for (; parm; parm = TREE_CHAIN (parm))
15052 if (TREE_VALUE (parm) == void_type_node)
15054 /* If not, the this parameter is in %eax. */
15056 return gen_rtx_REG (SImode, 0);
15059 if (aggregate_value_p (TREE_TYPE (type)))
15060 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15062 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15065 /* Determine whether x86_output_mi_thunk can succeed. */
15068 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15069 tree thunk ATTRIBUTE_UNUSED;
15070 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15071 HOST_WIDE_INT vcall_offset;
15074 /* 64-bit can handle anything. */
15078 /* For 32-bit, everything's fine if we have one free register. */
15079 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15082 /* Need a free register for vcall_offset. */
15086 /* Need a free register for GOT references. */
15087 if (flag_pic && !(*targetm.binds_local_p) (function))
15090 /* Otherwise ok. */
15094 /* Output the assembler code for a thunk function. THUNK_DECL is the
15095 declaration for the thunk function itself, FUNCTION is the decl for
15096 the target function. DELTA is an immediate constant offset to be
15097 added to THIS. If VCALL_OFFSET is nonzero, the word at
15098 *(*this + vcall_offset) should be added to THIS. */
15101 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15102 FILE *file ATTRIBUTE_UNUSED;
15103 tree thunk ATTRIBUTE_UNUSED;
15104 HOST_WIDE_INT delta;
15105 HOST_WIDE_INT vcall_offset;
15109 rtx this = x86_this_parameter (function);
15112 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15113 pull it in now and let DELTA benefit. */
15116 else if (vcall_offset)
15118 /* Put the this parameter into %eax. */
15120 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15121 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15124 this_reg = NULL_RTX;
15126 /* Adjust the this parameter by a fixed constant. */
15129 xops[0] = GEN_INT (delta);
15130 xops[1] = this_reg ? this_reg : this;
15133 if (!x86_64_general_operand (xops[0], DImode))
15135 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15137 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15141 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15144 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15147 /* Adjust the this parameter by a value stored in the vtable. */
15151 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15153 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15155 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15158 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15160 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15162 /* Adjust the this parameter. */
15163 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15164 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15166 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15167 xops[0] = GEN_INT (vcall_offset);
15169 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15170 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15172 xops[1] = this_reg;
15174 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15176 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15179 /* If necessary, drop THIS back to its stack slot. */
15180 if (this_reg && this_reg != this)
15182 xops[0] = this_reg;
15184 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15187 xops[0] = DECL_RTL (function);
15190 if (!flag_pic || (*targetm.binds_local_p) (function))
15191 output_asm_insn ("jmp\t%P0", xops);
15194 tmp = XEXP (xops[0], 0);
15195 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15196 tmp = gen_rtx_CONST (Pmode, tmp);
15197 tmp = gen_rtx_MEM (QImode, tmp);
15199 output_asm_insn ("jmp\t%A0", xops);
15204 if (!flag_pic || (*targetm.binds_local_p) (function))
15205 output_asm_insn ("jmp\t%P0", xops);
15210 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15211 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15212 tmp = gen_rtx_MEM (QImode, tmp);
15214 output_asm_insn ("jmp\t%0", xops);
15217 #endif /* TARGET_MACHO */
15219 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15220 output_set_got (tmp);
15223 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15224 output_asm_insn ("jmp\t{*}%1", xops);
15230 x86_field_alignment (field, computed)
15234 enum machine_mode mode;
15235 tree type = TREE_TYPE (field);
15237 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15239 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15240 ? get_inner_array_type (type) : type);
15241 if (mode == DFmode || mode == DCmode
15242 || GET_MODE_CLASS (mode) == MODE_INT
15243 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15244 return MIN (32, computed);
15248 /* Output assembler code to FILE to increment profiler label # LABELNO
15249 for profiling a function entry. */
15251 x86_function_profiler (file, labelno)
15253 int labelno ATTRIBUTE_UNUSED;
15258 #ifndef NO_PROFILE_COUNTERS
15259 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15261 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15265 #ifndef NO_PROFILE_COUNTERS
15266 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15268 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15272 #ifndef NO_PROFILE_COUNTERS
15273 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15274 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15276 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15280 #ifndef NO_PROFILE_COUNTERS
15281 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15282 PROFILE_COUNT_REGISTER);
15284 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15288 /* Implement machine specific optimizations.
15289 At the moment we implement single transformation: AMD Athlon works faster
15290 when RET is not destination of conditional jump or directly preceded
15291 by other jump instruction. We avoid the penalty by inserting NOP just
15292 before the RET instructions in such cases. */
15294 x86_machine_dependent_reorg (first)
15295 rtx first ATTRIBUTE_UNUSED;
15299 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15301 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15303 basic_block bb = e->src;
15306 bool insert = false;
15308 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15310 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15311 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15313 if (prev && GET_CODE (prev) == CODE_LABEL)
15316 for (e = bb->pred; e; e = e->pred_next)
15317 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15318 && !(e->flags & EDGE_FALLTHRU))
15323 prev = prev_active_insn (ret);
15324 if (prev && GET_CODE (prev) == JUMP_INSN
15325 && any_condjump_p (prev))
15327 /* Empty functions get branch misspredict even when the jump destination
15328 is not visible to us. */
15329 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15333 emit_insn_before (gen_nop (), ret);
15337 /* Return nonzero when QImode register that must be represented via REX prefix
15340 x86_extended_QIreg_mentioned_p (insn)
15344 extract_insn_cached (insn);
15345 for (i = 0; i < recog_data.n_operands; i++)
15346 if (REG_P (recog_data.operand[i])
15347 && REGNO (recog_data.operand[i]) >= 4)
15352 /* Return nonzero when P points to register encoded via REX prefix.
15353 Called via for_each_rtx. */
15355 extended_reg_mentioned_1 (p, data)
15357 void *data ATTRIBUTE_UNUSED;
15359 unsigned int regno;
15362 regno = REGNO (*p);
15363 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15366 /* Return true when INSN mentions register that must be encoded using REX
15369 x86_extended_reg_mentioned_p (insn)
15372 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15375 /* Generate an unsigned DImode to FP conversion. This is the same code
15376 optabs would emit if we didn't have TFmode patterns. */
15379 x86_emit_floatuns (operands)
15382 rtx neglab, donelab, i0, i1, f0, in, out;
15383 enum machine_mode mode;
15386 in = force_reg (DImode, operands[1]);
15387 mode = GET_MODE (out);
15388 neglab = gen_label_rtx ();
15389 donelab = gen_label_rtx ();
15390 i1 = gen_reg_rtx (Pmode);
15391 f0 = gen_reg_rtx (mode);
15393 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15395 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15396 emit_jump_insn (gen_jump (donelab));
15399 emit_label (neglab);
15401 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15402 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15403 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15404 expand_float (f0, i0, 0);
15405 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15407 emit_label (donelab);
15410 #include "gt-i386.h"