1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
524 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 /* In case the average insn count for single function invocation is
527 lower than this constant, emit fast (but longer) prologue and
529 #define FAST_PROLOGUE_INSN_COUNT 20
531 /* Set by prologue expander and used by epilogue expander to determine
533 static int use_fast_prologue_epilogue;
535 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
536 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
537 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
538 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
540 /* Array of the smallest class containing reg number REGNO, indexed by
541 REGNO. Used by REGNO_REG_CLASS in i386.h. */
543 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
546 AREG, DREG, CREG, BREG,
548 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
550 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
551 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
554 /* flags, fpsr, dirflag, frame */
555 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
556 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
558 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
562 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
566 /* The "default" register map used in 32bit mode. */
568 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
570 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
571 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
572 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
573 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
574 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
576 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
579 static int const x86_64_int_parameter_registers[6] =
581 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
582 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
585 static int const x86_64_int_return_registers[4] =
587 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
590 /* The "default" register map used in 64bit mode. */
591 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
593 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
594 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
595 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
596 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
597 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
598 8,9,10,11,12,13,14,15, /* extended integer registers */
599 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
602 /* Define the register numbers to be used in Dwarf debugging information.
603 The SVR4 reference port C compiler uses the following register numbers
604 in its Dwarf output code:
605 0 for %eax (gcc regno = 0)
606 1 for %ecx (gcc regno = 2)
607 2 for %edx (gcc regno = 1)
608 3 for %ebx (gcc regno = 3)
609 4 for %esp (gcc regno = 7)
610 5 for %ebp (gcc regno = 6)
611 6 for %esi (gcc regno = 4)
612 7 for %edi (gcc regno = 5)
613 The following three DWARF register numbers are never generated by
614 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
615 believes these numbers have these meanings.
616 8 for %eip (no gcc equivalent)
617 9 for %eflags (gcc regno = 17)
618 10 for %trapno (no gcc equivalent)
619 It is not at all clear how we should number the FP stack registers
620 for the x86 architecture. If the version of SDB on x86/svr4 were
621 a bit less brain dead with respect to floating-point then we would
622 have a precedent to follow with respect to DWARF register numbers
623 for x86 FP registers, but the SDB on x86/svr4 is so completely
624 broken with respect to FP registers that it is hardly worth thinking
625 of it as something to strive for compatibility with.
626 The version of x86/svr4 SDB I have at the moment does (partially)
627 seem to believe that DWARF register number 11 is associated with
628 the x86 register %st(0), but that's about all. Higher DWARF
629 register numbers don't seem to be associated with anything in
630 particular, and even for DWARF regno 11, SDB only seems to under-
631 stand that it should say that a variable lives in %st(0) (when
632 asked via an `=' command) if we said it was in DWARF regno 11,
633 but SDB still prints garbage when asked for the value of the
634 variable in question (via a `/' command).
635 (Also note that the labels SDB prints for various FP stack regs
636 when doing an `x' command are all wrong.)
637 Note that these problems generally don't affect the native SVR4
638 C compiler because it doesn't allow the use of -O with -g and
639 because when it is *not* optimizing, it allocates a memory
640 location for each floating-point variable, and the memory
641 location is what gets described in the DWARF AT_location
642 attribute for the variable in question.
643 Regardless of the severe mental illness of the x86/svr4 SDB, we
644 do something sensible here and we use the following DWARF
645 register numbers. Note that these are all stack-top-relative
647 11 for %st(0) (gcc regno = 8)
648 12 for %st(1) (gcc regno = 9)
649 13 for %st(2) (gcc regno = 10)
650 14 for %st(3) (gcc regno = 11)
651 15 for %st(4) (gcc regno = 12)
652 16 for %st(5) (gcc regno = 13)
653 17 for %st(6) (gcc regno = 14)
654 18 for %st(7) (gcc regno = 15)
656 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
658 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
659 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
660 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
661 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
662 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
664 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
667 /* Test and compare insns in i386.md store the information needed to
668 generate branch and scc insns here. */
670 rtx ix86_compare_op0 = NULL_RTX;
671 rtx ix86_compare_op1 = NULL_RTX;
673 /* The encoding characters for the four TLS models present in ELF. */
675 static char const tls_model_chars[] = " GLil";
677 #define MAX_386_STACK_LOCALS 3
678 /* Size of the register save area. */
679 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
681 /* Define the structure for the machine field in struct function. */
682 struct machine_function GTY(())
684 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
685 const char *some_ld_name;
686 int save_varrargs_registers;
687 int accesses_prev_frame;
690 #define ix86_stack_locals (cfun->machine->stack_locals)
691 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
693 /* Structure describing stack frame layout.
694 Stack grows downward:
700 saved frame pointer if frame_pointer_needed
701 <- HARD_FRAME_POINTER
707 > to_allocate <- FRAME_POINTER
719 int outgoing_arguments_size;
722 HOST_WIDE_INT to_allocate;
723 /* The offsets relative to ARG_POINTER. */
724 HOST_WIDE_INT frame_pointer_offset;
725 HOST_WIDE_INT hard_frame_pointer_offset;
726 HOST_WIDE_INT stack_pointer_offset;
729 /* Used to enable/disable debugging features. */
730 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
731 /* Code model option as passed by user. */
732 const char *ix86_cmodel_string;
734 enum cmodel ix86_cmodel;
736 const char *ix86_asm_string;
737 enum asm_dialect ix86_asm_dialect = ASM_ATT;
739 const char *ix86_tls_dialect_string;
740 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
742 /* Which unit we are generating floating point math for. */
743 enum fpmath_unit ix86_fpmath;
745 /* Which cpu are we scheduling for. */
746 enum processor_type ix86_cpu;
747 /* Which instruction set architecture to use. */
748 enum processor_type ix86_arch;
750 /* Strings to hold which cpu and instruction set architecture to use. */
751 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
752 const char *ix86_arch_string; /* for -march=<xxx> */
753 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
755 /* # of registers to use to pass arguments. */
756 const char *ix86_regparm_string;
758 /* true if sse prefetch instruction is not NOOP. */
759 int x86_prefetch_sse;
761 /* ix86_regparm_string as a number */
764 /* Alignment to use for loops and jumps: */
766 /* Power of two alignment for loops. */
767 const char *ix86_align_loops_string;
769 /* Power of two alignment for non-loop jumps. */
770 const char *ix86_align_jumps_string;
772 /* Power of two alignment for stack boundary in bytes. */
773 const char *ix86_preferred_stack_boundary_string;
775 /* Preferred alignment for stack boundary in bits. */
776 int ix86_preferred_stack_boundary;
778 /* Values 1-5: see jump.c */
779 int ix86_branch_cost;
780 const char *ix86_branch_cost_string;
782 /* Power of two alignment for functions. */
783 const char *ix86_align_funcs_string;
785 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
786 static char internal_label_prefix[16];
787 static int internal_label_prefix_len;
789 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
790 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
791 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
792 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
794 static const char *get_some_local_dynamic_name PARAMS ((void));
795 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
796 static rtx maybe_get_pool_constant PARAMS ((rtx));
797 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
798 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
800 static rtx get_thread_pointer PARAMS ((void));
801 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
802 static rtx gen_push PARAMS ((rtx));
803 static int memory_address_length PARAMS ((rtx addr));
804 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
806 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
807 static void ix86_dump_ppro_packet PARAMS ((FILE *));
808 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
809 static struct machine_function * ix86_init_machine_status PARAMS ((void));
810 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
811 static int ix86_nsaved_regs PARAMS ((void));
812 static void ix86_emit_save_regs PARAMS ((void));
813 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
814 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
815 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
816 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
817 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
818 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
819 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
820 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
821 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
822 static int ix86_issue_rate PARAMS ((void));
823 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
824 static void ix86_sched_init PARAMS ((FILE *, int, int));
825 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
826 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
827 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
828 static int ia32_multipass_dfa_lookahead PARAMS ((void));
829 static void ix86_init_mmx_sse_builtins PARAMS ((void));
830 static rtx x86_this_parameter PARAMS ((tree));
831 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
832 HOST_WIDE_INT, tree));
833 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree));
835 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
839 rtx base, index, disp;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
849 static const char *ix86_strip_name_encoding PARAMS ((const char *))
852 struct builtin_description;
853 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
855 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
857 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
858 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
859 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
860 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
861 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
862 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
863 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
867 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
869 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
871 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
872 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
873 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
874 static int ix86_save_reg PARAMS ((unsigned int, int));
875 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
876 static int ix86_comp_type_attributes PARAMS ((tree, tree));
877 static int ix86_fntype_regparm PARAMS ((tree));
878 const struct attribute_spec ix86_attribute_table[];
879 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
880 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
881 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int ix86_value_regno PARAMS ((enum machine_mode));
883 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
884 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
885 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
886 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
888 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
889 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
892 /* Register class used for passing given 64bit part of the argument.
893 These represent classes as documented by the PS ABI, with the exception
894 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
895 use SF or DFmode move instead of DImode to avoid reformatting penalties.
897 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
898 whenever possible (upper half does contain padding).
900 enum x86_64_reg_class
903 X86_64_INTEGER_CLASS,
904 X86_64_INTEGERSI_CLASS,
913 static const char * const x86_64_reg_class_name[] =
914 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
916 #define MAX_CLASSES 4
917 static int classify_argument PARAMS ((enum machine_mode, tree,
918 enum x86_64_reg_class [MAX_CLASSES],
920 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
922 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
924 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
925 enum x86_64_reg_class));
927 /* Initialize the GCC target structure. */
928 #undef TARGET_ATTRIBUTE_TABLE
929 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
930 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
931 # undef TARGET_MERGE_DECL_ATTRIBUTES
932 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
935 #undef TARGET_COMP_TYPE_ATTRIBUTES
936 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
938 #undef TARGET_INIT_BUILTINS
939 #define TARGET_INIT_BUILTINS ix86_init_builtins
941 #undef TARGET_EXPAND_BUILTIN
942 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
944 #undef TARGET_ASM_FUNCTION_EPILOGUE
945 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
947 #undef TARGET_ASM_OPEN_PAREN
948 #define TARGET_ASM_OPEN_PAREN ""
949 #undef TARGET_ASM_CLOSE_PAREN
950 #define TARGET_ASM_CLOSE_PAREN ""
952 #undef TARGET_ASM_ALIGNED_HI_OP
953 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
954 #undef TARGET_ASM_ALIGNED_SI_OP
955 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
957 #undef TARGET_ASM_ALIGNED_DI_OP
958 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
961 #undef TARGET_ASM_UNALIGNED_HI_OP
962 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
963 #undef TARGET_ASM_UNALIGNED_SI_OP
964 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
965 #undef TARGET_ASM_UNALIGNED_DI_OP
966 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
968 #undef TARGET_SCHED_ADJUST_COST
969 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
970 #undef TARGET_SCHED_ISSUE_RATE
971 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
972 #undef TARGET_SCHED_VARIABLE_ISSUE
973 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
974 #undef TARGET_SCHED_INIT
975 #define TARGET_SCHED_INIT ix86_sched_init
976 #undef TARGET_SCHED_REORDER
977 #define TARGET_SCHED_REORDER ix86_sched_reorder
978 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
979 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
980 ia32_use_dfa_pipeline_interface
981 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
982 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
983 ia32_multipass_dfa_lookahead
985 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
986 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
989 #undef TARGET_HAVE_TLS
990 #define TARGET_HAVE_TLS true
992 #undef TARGET_CANNOT_FORCE_CONST_MEM
993 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
995 #undef TARGET_DELEGITIMIZE_ADDRESS
996 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
998 #undef TARGET_MS_BITFIELD_LAYOUT_P
999 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1001 #undef TARGET_ASM_OUTPUT_MI_THUNK
1002 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1003 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1004 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1006 #undef TARGET_RTX_COSTS
1007 #define TARGET_RTX_COSTS ix86_rtx_costs
1008 #undef TARGET_ADDRESS_COST
1009 #define TARGET_ADDRESS_COST ix86_address_cost
1011 struct gcc_target targetm = TARGET_INITIALIZER;
1013 /* Sometimes certain combinations of command options do not make
1014 sense on a particular target machine. You can define a macro
1015 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1016 defined, is executed once just after all the command options have
1019 Don't use this macro to turn on various extra optimizations for
1020 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1026 /* Comes from final.c -- no real reason to change it. */
1027 #define MAX_CODE_ALIGN 16
1031 const struct processor_costs *cost; /* Processor costs */
1032 const int target_enable; /* Target flags to enable. */
1033 const int target_disable; /* Target flags to disable. */
1034 const int align_loop; /* Default alignments. */
1035 const int align_loop_max_skip;
1036 const int align_jump;
1037 const int align_jump_max_skip;
1038 const int align_func;
1040 const processor_target_table[PROCESSOR_max] =
1042 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1043 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1044 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1045 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1046 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1047 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1048 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1049 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1052 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1055 const char *const name; /* processor name or nickname. */
1056 const enum processor_type processor;
1057 const enum pta_flags
1062 PTA_PREFETCH_SSE = 8,
1068 const processor_alias_table[] =
1070 {"i386", PROCESSOR_I386, 0},
1071 {"i486", PROCESSOR_I486, 0},
1072 {"i586", PROCESSOR_PENTIUM, 0},
1073 {"pentium", PROCESSOR_PENTIUM, 0},
1074 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1075 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1076 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1077 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1078 {"i686", PROCESSOR_PENTIUMPRO, 0},
1079 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1080 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1081 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1082 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1083 PTA_MMX | PTA_PREFETCH_SSE},
1084 {"k6", PROCESSOR_K6, PTA_MMX},
1085 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1086 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1087 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1089 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1090 | PTA_3DNOW | PTA_3DNOW_A},
1091 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1092 | PTA_3DNOW_A | PTA_SSE},
1093 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1094 | PTA_3DNOW_A | PTA_SSE},
1095 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1096 | PTA_3DNOW_A | PTA_SSE},
1097 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1098 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1101 int const pta_size = ARRAY_SIZE (processor_alias_table);
1103 /* By default our XFmode is the 80-bit extended format. If we have
1104 use TFmode instead, it's also the 80-bit format, but with padding. */
1105 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1106 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1108 /* Set the default values for switches whose default depends on TARGET_64BIT
1109 in case they weren't overwritten by command line options. */
1112 if (flag_omit_frame_pointer == 2)
1113 flag_omit_frame_pointer = 1;
1114 if (flag_asynchronous_unwind_tables == 2)
1115 flag_asynchronous_unwind_tables = 1;
1116 if (flag_pcc_struct_return == 2)
1117 flag_pcc_struct_return = 0;
1121 if (flag_omit_frame_pointer == 2)
1122 flag_omit_frame_pointer = 0;
1123 if (flag_asynchronous_unwind_tables == 2)
1124 flag_asynchronous_unwind_tables = 0;
1125 if (flag_pcc_struct_return == 2)
1126 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1129 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1130 SUBTARGET_OVERRIDE_OPTIONS;
1133 if (!ix86_cpu_string && ix86_arch_string)
1134 ix86_cpu_string = ix86_arch_string;
1135 if (!ix86_cpu_string)
1136 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1137 if (!ix86_arch_string)
1138 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1140 if (ix86_cmodel_string != 0)
1142 if (!strcmp (ix86_cmodel_string, "small"))
1143 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1145 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1146 else if (!strcmp (ix86_cmodel_string, "32"))
1147 ix86_cmodel = CM_32;
1148 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1149 ix86_cmodel = CM_KERNEL;
1150 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1151 ix86_cmodel = CM_MEDIUM;
1152 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1153 ix86_cmodel = CM_LARGE;
1155 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1159 ix86_cmodel = CM_32;
1161 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1163 if (ix86_asm_string != 0)
1165 if (!strcmp (ix86_asm_string, "intel"))
1166 ix86_asm_dialect = ASM_INTEL;
1167 else if (!strcmp (ix86_asm_string, "att"))
1168 ix86_asm_dialect = ASM_ATT;
1170 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1172 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1173 error ("code model `%s' not supported in the %s bit mode",
1174 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1175 if (ix86_cmodel == CM_LARGE)
1176 sorry ("code model `large' not supported yet");
1177 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1178 sorry ("%i-bit mode not compiled in",
1179 (target_flags & MASK_64BIT) ? 64 : 32);
1181 for (i = 0; i < pta_size; i++)
1182 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1184 ix86_arch = processor_alias_table[i].processor;
1185 /* Default cpu tuning to the architecture. */
1186 ix86_cpu = ix86_arch;
1187 if (processor_alias_table[i].flags & PTA_MMX
1188 && !(target_flags_explicit & MASK_MMX))
1189 target_flags |= MASK_MMX;
1190 if (processor_alias_table[i].flags & PTA_3DNOW
1191 && !(target_flags_explicit & MASK_3DNOW))
1192 target_flags |= MASK_3DNOW;
1193 if (processor_alias_table[i].flags & PTA_3DNOW_A
1194 && !(target_flags_explicit & MASK_3DNOW_A))
1195 target_flags |= MASK_3DNOW_A;
1196 if (processor_alias_table[i].flags & PTA_SSE
1197 && !(target_flags_explicit & MASK_SSE))
1198 target_flags |= MASK_SSE;
1199 if (processor_alias_table[i].flags & PTA_SSE2
1200 && !(target_flags_explicit & MASK_SSE2))
1201 target_flags |= MASK_SSE2;
1202 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1203 x86_prefetch_sse = true;
1204 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1205 error ("CPU you selected does not support x86-64 instruction set");
1210 error ("bad value (%s) for -march= switch", ix86_arch_string);
1212 for (i = 0; i < pta_size; i++)
1213 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1215 ix86_cpu = processor_alias_table[i].processor;
1216 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1217 error ("CPU you selected does not support x86-64 instruction set");
1220 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1221 x86_prefetch_sse = true;
1223 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1226 ix86_cost = &size_cost;
1228 ix86_cost = processor_target_table[ix86_cpu].cost;
1229 target_flags |= processor_target_table[ix86_cpu].target_enable;
1230 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1232 /* Arrange to set up i386_stack_locals for all functions. */
1233 init_machine_status = ix86_init_machine_status;
1235 /* Validate -mregparm= value. */
1236 if (ix86_regparm_string)
1238 i = atoi (ix86_regparm_string);
1239 if (i < 0 || i > REGPARM_MAX)
1240 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1246 ix86_regparm = REGPARM_MAX;
1248 /* If the user has provided any of the -malign-* options,
1249 warn and use that value only if -falign-* is not set.
1250 Remove this code in GCC 3.2 or later. */
1251 if (ix86_align_loops_string)
1253 warning ("-malign-loops is obsolete, use -falign-loops");
1254 if (align_loops == 0)
1256 i = atoi (ix86_align_loops_string);
1257 if (i < 0 || i > MAX_CODE_ALIGN)
1258 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1260 align_loops = 1 << i;
1264 if (ix86_align_jumps_string)
1266 warning ("-malign-jumps is obsolete, use -falign-jumps");
1267 if (align_jumps == 0)
1269 i = atoi (ix86_align_jumps_string);
1270 if (i < 0 || i > MAX_CODE_ALIGN)
1271 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1273 align_jumps = 1 << i;
1277 if (ix86_align_funcs_string)
1279 warning ("-malign-functions is obsolete, use -falign-functions");
1280 if (align_functions == 0)
1282 i = atoi (ix86_align_funcs_string);
1283 if (i < 0 || i > MAX_CODE_ALIGN)
1284 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1286 align_functions = 1 << i;
1290 /* Default align_* from the processor table. */
1291 if (align_loops == 0)
1293 align_loops = processor_target_table[ix86_cpu].align_loop;
1294 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1296 if (align_jumps == 0)
1298 align_jumps = processor_target_table[ix86_cpu].align_jump;
1299 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1301 if (align_functions == 0)
1303 align_functions = processor_target_table[ix86_cpu].align_func;
1306 /* Validate -mpreferred-stack-boundary= value, or provide default.
1307 The default of 128 bits is for Pentium III's SSE __m128, but we
1308 don't want additional code to keep the stack aligned when
1309 optimizing for code size. */
1310 ix86_preferred_stack_boundary = (optimize_size
1311 ? TARGET_64BIT ? 128 : 32
1313 if (ix86_preferred_stack_boundary_string)
1315 i = atoi (ix86_preferred_stack_boundary_string);
1316 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1317 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1318 TARGET_64BIT ? 4 : 2);
1320 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1323 /* Validate -mbranch-cost= value, or provide default. */
1324 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1325 if (ix86_branch_cost_string)
1327 i = atoi (ix86_branch_cost_string);
1329 error ("-mbranch-cost=%d is not between 0 and 5", i);
1331 ix86_branch_cost = i;
1334 if (ix86_tls_dialect_string)
1336 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1337 ix86_tls_dialect = TLS_DIALECT_GNU;
1338 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1339 ix86_tls_dialect = TLS_DIALECT_SUN;
1341 error ("bad value (%s) for -mtls-dialect= switch",
1342 ix86_tls_dialect_string);
1345 /* Keep nonleaf frame pointers. */
1346 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1347 flag_omit_frame_pointer = 1;
1349 /* If we're doing fast math, we don't care about comparison order
1350 wrt NaNs. This lets us use a shorter comparison sequence. */
1351 if (flag_unsafe_math_optimizations)
1352 target_flags &= ~MASK_IEEE_FP;
1354 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1355 since the insns won't need emulation. */
1356 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1357 target_flags &= ~MASK_NO_FANCY_MATH_387;
1361 if (TARGET_ALIGN_DOUBLE)
1362 error ("-malign-double makes no sense in the 64bit mode");
1364 error ("-mrtd calling convention not supported in the 64bit mode");
1365 /* Enable by default the SSE and MMX builtins. */
1366 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1367 ix86_fpmath = FPMATH_SSE;
1370 ix86_fpmath = FPMATH_387;
1372 if (ix86_fpmath_string != 0)
1374 if (! strcmp (ix86_fpmath_string, "387"))
1375 ix86_fpmath = FPMATH_387;
1376 else if (! strcmp (ix86_fpmath_string, "sse"))
1380 warning ("SSE instruction set disabled, using 387 arithmetics");
1381 ix86_fpmath = FPMATH_387;
1384 ix86_fpmath = FPMATH_SSE;
1386 else if (! strcmp (ix86_fpmath_string, "387,sse")
1387 || ! strcmp (ix86_fpmath_string, "sse,387"))
1391 warning ("SSE instruction set disabled, using 387 arithmetics");
1392 ix86_fpmath = FPMATH_387;
1394 else if (!TARGET_80387)
1396 warning ("387 instruction set disabled, using SSE arithmetics");
1397 ix86_fpmath = FPMATH_SSE;
1400 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1403 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1406 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1410 target_flags |= MASK_MMX;
1411 x86_prefetch_sse = true;
1414 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1417 target_flags |= MASK_MMX;
1418 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1419 extensions it adds. */
1420 if (x86_3dnow_a & (1 << ix86_arch))
1421 target_flags |= MASK_3DNOW_A;
1423 if ((x86_accumulate_outgoing_args & CPUMASK)
1424 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1426 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1428 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1431 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1432 p = strchr (internal_label_prefix, 'X');
1433 internal_label_prefix_len = p - internal_label_prefix;
1439 optimization_options (level, size)
1441 int size ATTRIBUTE_UNUSED;
1443 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1444 make the problem with not enough registers even worse. */
1445 #ifdef INSN_SCHEDULING
1447 flag_schedule_insns = 0;
1450 /* The default values of these switches depend on the TARGET_64BIT
1451 that is not known at this moment. Mark these values with 2 and
1452 let user the to override these. In case there is no command line option
1453 specifying them, we will set the defaults in override_options. */
1455 flag_omit_frame_pointer = 2;
1456 flag_pcc_struct_return = 2;
1457 flag_asynchronous_unwind_tables = 2;
1460 /* Table of valid machine attributes. */
1461 const struct attribute_spec ix86_attribute_table[] =
1463 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1464 /* Stdcall attribute says callee is responsible for popping arguments
1465 if they are not variable. */
1466 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1467 /* Fastcall attribute says callee is responsible for popping arguments
1468 if they are not variable. */
1469 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1470 /* Cdecl attribute says the callee is a normal C declaration */
1471 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1472 /* Regparm attribute specifies how many integer arguments are to be
1473 passed in registers. */
1474 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1475 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1476 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1477 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1478 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1480 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1481 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1482 { NULL, 0, 0, false, false, false, NULL }
1485 /* Decide whether we can make a sibling call to a function. DECL is the
1486 declaration of the function being targeted by the call and EXP is the
1487 CALL_EXPR representing the call. */
1490 ix86_function_ok_for_sibcall (decl, exp)
1494 /* If we are generating position-independent code, we cannot sibcall
1495 optimize any indirect call, or a direct call to a global function,
1496 as the PLT requires %ebx be live. */
1497 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1500 /* If we are returning floats on the 80387 register stack, we cannot
1501 make a sibcall from a function that doesn't return a float to a
1502 function that does or, conversely, from a function that does return
1503 a float to a function that doesn't; the necessary stack adjustment
1504 would not be executed. */
1505 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1506 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1509 /* If this call is indirect, we'll need to be able to use a call-clobbered
1510 register for the address of the target function. Make sure that all
1511 such registers are not used for passing parameters. */
1512 if (!decl && !TARGET_64BIT)
1514 int regparm = ix86_regparm;
1517 /* We're looking at the CALL_EXPR, we need the type of the function. */
1518 type = TREE_OPERAND (exp, 0); /* pointer expression */
1519 type = TREE_TYPE (type); /* pointer type */
1520 type = TREE_TYPE (type); /* function type */
1522 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1524 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1528 /* ??? Need to count the actual number of registers to be used,
1529 not the possible number of registers. Fix later. */
1534 /* Otherwise okay. That also includes certain types of indirect calls. */
1538 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1539 arguments as in struct attribute_spec.handler. */
1541 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1544 tree args ATTRIBUTE_UNUSED;
1545 int flags ATTRIBUTE_UNUSED;
1548 if (TREE_CODE (*node) != FUNCTION_TYPE
1549 && TREE_CODE (*node) != METHOD_TYPE
1550 && TREE_CODE (*node) != FIELD_DECL
1551 && TREE_CODE (*node) != TYPE_DECL)
1553 warning ("`%s' attribute only applies to functions",
1554 IDENTIFIER_POINTER (name));
1555 *no_add_attrs = true;
1559 if (is_attribute_p ("fastcall", name))
1561 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1563 error ("fastcall and stdcall attributes are not compatible");
1565 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1567 error ("fastcall and regparm attributes are not compatible");
1570 else if (is_attribute_p ("stdcall", name))
1572 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1574 error ("fastcall and stdcall attributes are not compatible");
1581 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1582 *no_add_attrs = true;
1588 /* Handle a "regparm" attribute;
1589 arguments as in struct attribute_spec.handler. */
1591 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1595 int flags ATTRIBUTE_UNUSED;
1598 if (TREE_CODE (*node) != FUNCTION_TYPE
1599 && TREE_CODE (*node) != METHOD_TYPE
1600 && TREE_CODE (*node) != FIELD_DECL
1601 && TREE_CODE (*node) != TYPE_DECL)
1603 warning ("`%s' attribute only applies to functions",
1604 IDENTIFIER_POINTER (name));
1605 *no_add_attrs = true;
1611 cst = TREE_VALUE (args);
1612 if (TREE_CODE (cst) != INTEGER_CST)
1614 warning ("`%s' attribute requires an integer constant argument",
1615 IDENTIFIER_POINTER (name));
1616 *no_add_attrs = true;
1618 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1620 warning ("argument to `%s' attribute larger than %d",
1621 IDENTIFIER_POINTER (name), REGPARM_MAX);
1622 *no_add_attrs = true;
1625 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1627 error ("fastcall and regparm attributes are not compatible");
1634 /* Return 0 if the attributes for two types are incompatible, 1 if they
1635 are compatible, and 2 if they are nearly compatible (which causes a
1636 warning to be generated). */
1639 ix86_comp_type_attributes (type1, type2)
1643 /* Check for mismatch of non-default calling convention. */
1644 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1646 if (TREE_CODE (type1) != FUNCTION_TYPE)
1649 /* Check for mismatched fastcall types */
1650 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1651 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1654 /* Check for mismatched return types (cdecl vs stdcall). */
1655 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1656 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1661 /* Return the regparm value for a fuctio with the indicated TYPE. */
1664 ix86_fntype_regparm (type)
1669 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1671 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1673 return ix86_regparm;
1676 /* Value is the number of bytes of arguments automatically
1677 popped when returning from a subroutine call.
1678 FUNDECL is the declaration node of the function (as a tree),
1679 FUNTYPE is the data type of the function (as a tree),
1680 or for a library call it is an identifier node for the subroutine name.
1681 SIZE is the number of bytes of arguments passed on the stack.
1683 On the 80386, the RTD insn may be used to pop them if the number
1684 of args is fixed, but if the number is variable then the caller
1685 must pop them all. RTD can't be used for library calls now
1686 because the library is compiled with the Unix compiler.
1687 Use of RTD is a selectable option, since it is incompatible with
1688 standard Unix calling sequences. If the option is not selected,
1689 the caller must always pop the args.
1691 The attribute stdcall is equivalent to RTD on a per module basis. */
1694 ix86_return_pops_args (fundecl, funtype, size)
1699 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1701 /* Cdecl functions override -mrtd, and never pop the stack. */
1702 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1704 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1705 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1706 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1710 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1711 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1712 == void_type_node)))
1716 /* Lose any fake structure return argument if it is passed on the stack. */
1717 if (aggregate_value_p (TREE_TYPE (funtype))
1720 int nregs = ix86_fntype_regparm (funtype);
1723 return GET_MODE_SIZE (Pmode);
1729 /* Argument support functions. */
1731 /* Return true when register may be used to pass function parameters. */
1733 ix86_function_arg_regno_p (regno)
1738 return (regno < REGPARM_MAX
1739 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1740 if (SSE_REGNO_P (regno) && TARGET_SSE)
1742 /* RAX is used as hidden argument to va_arg functions. */
1745 for (i = 0; i < REGPARM_MAX; i++)
1746 if (regno == x86_64_int_parameter_registers[i])
1751 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1752 for a call to a function whose data type is FNTYPE.
1753 For a library call, FNTYPE is 0. */
1756 init_cumulative_args (cum, fntype, libname)
1757 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1758 tree fntype; /* tree ptr for function decl */
1759 rtx libname; /* SYMBOL_REF of library name or 0 */
1761 static CUMULATIVE_ARGS zero_cum;
1762 tree param, next_param;
1764 if (TARGET_DEBUG_ARG)
1766 fprintf (stderr, "\ninit_cumulative_args (");
1768 fprintf (stderr, "fntype code = %s, ret code = %s",
1769 tree_code_name[(int) TREE_CODE (fntype)],
1770 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1772 fprintf (stderr, "no fntype");
1775 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1780 /* Set up the number of registers to use for passing arguments. */
1781 cum->nregs = ix86_regparm;
1782 cum->sse_nregs = SSE_REGPARM_MAX;
1783 if (fntype && !TARGET_64BIT)
1785 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1788 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1790 cum->maybe_vaarg = false;
1792 /* Use ecx and edx registers if function has fastcall attribute */
1793 if (fntype && !TARGET_64BIT)
1795 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1803 /* Determine if this function has variable arguments. This is
1804 indicated by the last argument being 'void_type_mode' if there
1805 are no variable arguments. If there are variable arguments, then
1806 we won't pass anything in registers */
1810 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1811 param != 0; param = next_param)
1813 next_param = TREE_CHAIN (param);
1814 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1821 cum->maybe_vaarg = true;
1825 if ((!fntype && !libname)
1826 || (fntype && !TYPE_ARG_TYPES (fntype)))
1827 cum->maybe_vaarg = 1;
1829 if (TARGET_DEBUG_ARG)
1830 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1835 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1836 of this code is to classify each 8bytes of incoming argument by the register
1837 class and assign registers accordingly. */
1839 /* Return the union class of CLASS1 and CLASS2.
1840 See the x86-64 PS ABI for details. */
1842 static enum x86_64_reg_class
1843 merge_classes (class1, class2)
1844 enum x86_64_reg_class class1, class2;
1846 /* Rule #1: If both classes are equal, this is the resulting class. */
1847 if (class1 == class2)
1850 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1852 if (class1 == X86_64_NO_CLASS)
1854 if (class2 == X86_64_NO_CLASS)
1857 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1858 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1859 return X86_64_MEMORY_CLASS;
1861 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1862 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1863 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1864 return X86_64_INTEGERSI_CLASS;
1865 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1866 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1867 return X86_64_INTEGER_CLASS;
1869 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1870 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1871 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1872 return X86_64_MEMORY_CLASS;
1874 /* Rule #6: Otherwise class SSE is used. */
1875 return X86_64_SSE_CLASS;
1878 /* Classify the argument of type TYPE and mode MODE.
1879 CLASSES will be filled by the register class used to pass each word
1880 of the operand. The number of words is returned. In case the parameter
1881 should be passed in memory, 0 is returned. As a special case for zero
1882 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1884 BIT_OFFSET is used internally for handling records and specifies offset
1885 of the offset in bits modulo 256 to avoid overflow cases.
1887 See the x86-64 PS ABI for details.
1891 classify_argument (mode, type, classes, bit_offset)
1892 enum machine_mode mode;
1894 enum x86_64_reg_class classes[MAX_CLASSES];
1898 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1899 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1901 /* Variable sized entities are always passed/returned in memory. */
1905 if (type && AGGREGATE_TYPE_P (type))
1909 enum x86_64_reg_class subclasses[MAX_CLASSES];
1911 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1915 for (i = 0; i < words; i++)
1916 classes[i] = X86_64_NO_CLASS;
1918 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1919 signalize memory class, so handle it as special case. */
1922 classes[0] = X86_64_NO_CLASS;
1926 /* Classify each field of record and merge classes. */
1927 if (TREE_CODE (type) == RECORD_TYPE)
1929 /* For classes first merge in the field of the subclasses. */
1930 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1932 tree bases = TYPE_BINFO_BASETYPES (type);
1933 int n_bases = TREE_VEC_LENGTH (bases);
1936 for (i = 0; i < n_bases; ++i)
1938 tree binfo = TREE_VEC_ELT (bases, i);
1940 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1941 tree type = BINFO_TYPE (binfo);
1943 num = classify_argument (TYPE_MODE (type),
1945 (offset + bit_offset) % 256);
1948 for (i = 0; i < num; i++)
1950 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1952 merge_classes (subclasses[i], classes[i + pos]);
1956 /* And now merge the fields of structure. */
1957 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1959 if (TREE_CODE (field) == FIELD_DECL)
1963 /* Bitfields are always classified as integer. Handle them
1964 early, since later code would consider them to be
1965 misaligned integers. */
1966 if (DECL_BIT_FIELD (field))
1968 for (i = int_bit_position (field) / 8 / 8;
1969 i < (int_bit_position (field)
1970 + tree_low_cst (DECL_SIZE (field), 0)
1973 merge_classes (X86_64_INTEGER_CLASS,
1978 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1979 TREE_TYPE (field), subclasses,
1980 (int_bit_position (field)
1981 + bit_offset) % 256);
1984 for (i = 0; i < num; i++)
1987 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1989 merge_classes (subclasses[i], classes[i + pos]);
1995 /* Arrays are handled as small records. */
1996 else if (TREE_CODE (type) == ARRAY_TYPE)
1999 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2000 TREE_TYPE (type), subclasses, bit_offset);
2004 /* The partial classes are now full classes. */
2005 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2006 subclasses[0] = X86_64_SSE_CLASS;
2007 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2008 subclasses[0] = X86_64_INTEGER_CLASS;
2010 for (i = 0; i < words; i++)
2011 classes[i] = subclasses[i % num];
2013 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2014 else if (TREE_CODE (type) == UNION_TYPE
2015 || TREE_CODE (type) == QUAL_UNION_TYPE)
2017 /* For classes first merge in the field of the subclasses. */
2018 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2020 tree bases = TYPE_BINFO_BASETYPES (type);
2021 int n_bases = TREE_VEC_LENGTH (bases);
2024 for (i = 0; i < n_bases; ++i)
2026 tree binfo = TREE_VEC_ELT (bases, i);
2028 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2029 tree type = BINFO_TYPE (binfo);
2031 num = classify_argument (TYPE_MODE (type),
2033 (offset + (bit_offset % 64)) % 256);
2036 for (i = 0; i < num; i++)
2038 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2040 merge_classes (subclasses[i], classes[i + pos]);
2044 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2046 if (TREE_CODE (field) == FIELD_DECL)
2049 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2050 TREE_TYPE (field), subclasses,
2054 for (i = 0; i < num; i++)
2055 classes[i] = merge_classes (subclasses[i], classes[i]);
2062 /* Final merger cleanup. */
2063 for (i = 0; i < words; i++)
2065 /* If one class is MEMORY, everything should be passed in
2067 if (classes[i] == X86_64_MEMORY_CLASS)
2070 /* The X86_64_SSEUP_CLASS should be always preceded by
2071 X86_64_SSE_CLASS. */
2072 if (classes[i] == X86_64_SSEUP_CLASS
2073 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2074 classes[i] = X86_64_SSE_CLASS;
2076 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2077 if (classes[i] == X86_64_X87UP_CLASS
2078 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2079 classes[i] = X86_64_SSE_CLASS;
2084 /* Compute alignment needed. We align all types to natural boundaries with
2085 exception of XFmode that is aligned to 64bits. */
2086 if (mode != VOIDmode && mode != BLKmode)
2088 int mode_alignment = GET_MODE_BITSIZE (mode);
2091 mode_alignment = 128;
2092 else if (mode == XCmode)
2093 mode_alignment = 256;
2094 /* Misaligned fields are always returned in memory. */
2095 if (bit_offset % mode_alignment)
2099 /* Classification of atomic types. */
2109 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2110 classes[0] = X86_64_INTEGERSI_CLASS;
2112 classes[0] = X86_64_INTEGER_CLASS;
2116 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2119 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2120 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2123 if (!(bit_offset % 64))
2124 classes[0] = X86_64_SSESF_CLASS;
2126 classes[0] = X86_64_SSE_CLASS;
2129 classes[0] = X86_64_SSEDF_CLASS;
2132 classes[0] = X86_64_X87_CLASS;
2133 classes[1] = X86_64_X87UP_CLASS;
2136 classes[0] = X86_64_X87_CLASS;
2137 classes[1] = X86_64_X87UP_CLASS;
2138 classes[2] = X86_64_X87_CLASS;
2139 classes[3] = X86_64_X87UP_CLASS;
2142 classes[0] = X86_64_SSEDF_CLASS;
2143 classes[1] = X86_64_SSEDF_CLASS;
2146 classes[0] = X86_64_SSE_CLASS;
2154 classes[0] = X86_64_SSE_CLASS;
2155 classes[1] = X86_64_SSEUP_CLASS;
2170 /* Examine the argument and return set number of register required in each
2171 class. Return 0 iff parameter should be passed in memory. */
2173 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2174 enum machine_mode mode;
2176 int *int_nregs, *sse_nregs;
2179 enum x86_64_reg_class class[MAX_CLASSES];
2180 int n = classify_argument (mode, type, class, 0);
2186 for (n--; n >= 0; n--)
2189 case X86_64_INTEGER_CLASS:
2190 case X86_64_INTEGERSI_CLASS:
2193 case X86_64_SSE_CLASS:
2194 case X86_64_SSESF_CLASS:
2195 case X86_64_SSEDF_CLASS:
2198 case X86_64_NO_CLASS:
2199 case X86_64_SSEUP_CLASS:
2201 case X86_64_X87_CLASS:
2202 case X86_64_X87UP_CLASS:
2206 case X86_64_MEMORY_CLASS:
2211 /* Construct container for the argument used by GCC interface. See
2212 FUNCTION_ARG for the detailed description. */
2214 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2215 enum machine_mode mode;
2218 int nintregs, nsseregs;
2222 enum machine_mode tmpmode;
2224 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2225 enum x86_64_reg_class class[MAX_CLASSES];
2229 int needed_sseregs, needed_intregs;
2230 rtx exp[MAX_CLASSES];
2233 n = classify_argument (mode, type, class, 0);
2234 if (TARGET_DEBUG_ARG)
2237 fprintf (stderr, "Memory class\n");
2240 fprintf (stderr, "Classes:");
2241 for (i = 0; i < n; i++)
2243 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2245 fprintf (stderr, "\n");
2250 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2252 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2255 /* First construct simple cases. Avoid SCmode, since we want to use
2256 single register to pass this type. */
2257 if (n == 1 && mode != SCmode)
2260 case X86_64_INTEGER_CLASS:
2261 case X86_64_INTEGERSI_CLASS:
2262 return gen_rtx_REG (mode, intreg[0]);
2263 case X86_64_SSE_CLASS:
2264 case X86_64_SSESF_CLASS:
2265 case X86_64_SSEDF_CLASS:
2266 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2267 case X86_64_X87_CLASS:
2268 return gen_rtx_REG (mode, FIRST_STACK_REG);
2269 case X86_64_NO_CLASS:
2270 /* Zero sized array, struct or class. */
2275 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2276 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2278 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2279 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2280 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2281 && class[1] == X86_64_INTEGER_CLASS
2282 && (mode == CDImode || mode == TImode)
2283 && intreg[0] + 1 == intreg[1])
2284 return gen_rtx_REG (mode, intreg[0]);
2286 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2287 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2288 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2290 /* Otherwise figure out the entries of the PARALLEL. */
2291 for (i = 0; i < n; i++)
2295 case X86_64_NO_CLASS:
2297 case X86_64_INTEGER_CLASS:
2298 case X86_64_INTEGERSI_CLASS:
2299 /* Merge TImodes on aligned occasions here too. */
2300 if (i * 8 + 8 > bytes)
2301 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2302 else if (class[i] == X86_64_INTEGERSI_CLASS)
2306 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2307 if (tmpmode == BLKmode)
2309 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2310 gen_rtx_REG (tmpmode, *intreg),
2314 case X86_64_SSESF_CLASS:
2315 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2316 gen_rtx_REG (SFmode,
2317 SSE_REGNO (sse_regno)),
2321 case X86_64_SSEDF_CLASS:
2322 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2323 gen_rtx_REG (DFmode,
2324 SSE_REGNO (sse_regno)),
2328 case X86_64_SSE_CLASS:
2329 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2333 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2334 gen_rtx_REG (tmpmode,
2335 SSE_REGNO (sse_regno)),
2337 if (tmpmode == TImode)
2345 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2346 for (i = 0; i < nexps; i++)
2347 XVECEXP (ret, 0, i) = exp [i];
2351 /* Update the data in CUM to advance over an argument
2352 of mode MODE and data type TYPE.
2353 (TYPE is null for libcalls where that information may not be available.) */
2356 function_arg_advance (cum, mode, type, named)
2357 CUMULATIVE_ARGS *cum; /* current arg information */
2358 enum machine_mode mode; /* current arg mode */
2359 tree type; /* type of the argument or 0 if lib support */
2360 int named; /* whether or not the argument was named */
2363 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2364 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2366 if (TARGET_DEBUG_ARG)
2368 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2369 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2372 int int_nregs, sse_nregs;
2373 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2374 cum->words += words;
2375 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2377 cum->nregs -= int_nregs;
2378 cum->sse_nregs -= sse_nregs;
2379 cum->regno += int_nregs;
2380 cum->sse_regno += sse_nregs;
2383 cum->words += words;
2387 if (TARGET_SSE && mode == TImode)
2389 cum->sse_words += words;
2390 cum->sse_nregs -= 1;
2391 cum->sse_regno += 1;
2392 if (cum->sse_nregs <= 0)
2400 cum->words += words;
2401 cum->nregs -= words;
2402 cum->regno += words;
2404 if (cum->nregs <= 0)
2414 /* Define where to put the arguments to a function.
2415 Value is zero to push the argument on the stack,
2416 or a hard register in which to store the argument.
2418 MODE is the argument's machine mode.
2419 TYPE is the data type of the argument (as a tree).
2420 This is null for libcalls where that information may
2422 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2423 the preceding args and about the function being called.
2424 NAMED is nonzero if this argument is a named parameter
2425 (otherwise it is an extra parameter matching an ellipsis). */
2428 function_arg (cum, mode, type, named)
2429 CUMULATIVE_ARGS *cum; /* current arg information */
2430 enum machine_mode mode; /* current arg mode */
2431 tree type; /* type of the argument or 0 if lib support */
2432 int named; /* != 0 for normal args, == 0 for ... args */
2436 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2437 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2439 /* Handle a hidden AL argument containing number of registers for varargs
2440 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2442 if (mode == VOIDmode)
2445 return GEN_INT (cum->maybe_vaarg
2446 ? (cum->sse_nregs < 0
2454 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2455 &x86_64_int_parameter_registers [cum->regno],
2460 /* For now, pass fp/complex values on the stack. */
2469 if (words <= cum->nregs)
2471 int regno = cum->regno;
2473 /* Fastcall allocates the first two DWORD (SImode) or
2474 smaller arguments to ECX and EDX. */
2477 if (mode == BLKmode || mode == DImode)
2480 /* ECX not EAX is the first allocated register. */
2484 ret = gen_rtx_REG (mode, regno);
2489 ret = gen_rtx_REG (mode, cum->sse_regno);
2493 if (TARGET_DEBUG_ARG)
2496 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2497 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2500 print_simple_rtl (stderr, ret);
2502 fprintf (stderr, ", stack");
2504 fprintf (stderr, " )\n");
2510 /* A C expression that indicates when an argument must be passed by
2511 reference. If nonzero for an argument, a copy of that argument is
2512 made in memory and a pointer to the argument is passed instead of
2513 the argument itself. The pointer is passed in whatever way is
2514 appropriate for passing a pointer to that type. */
2517 function_arg_pass_by_reference (cum, mode, type, named)
2518 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2519 enum machine_mode mode ATTRIBUTE_UNUSED;
2521 int named ATTRIBUTE_UNUSED;
2526 if (type && int_size_in_bytes (type) == -1)
2528 if (TARGET_DEBUG_ARG)
2529 fprintf (stderr, "function_arg_pass_by_reference\n");
2536 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2540 ix86_function_arg_boundary (mode, type)
2541 enum machine_mode mode;
2546 return PARM_BOUNDARY;
2548 align = TYPE_ALIGN (type);
2550 align = GET_MODE_ALIGNMENT (mode);
2551 if (align < PARM_BOUNDARY)
2552 align = PARM_BOUNDARY;
2558 /* Return true if N is a possible register number of function value. */
2560 ix86_function_value_regno_p (regno)
2565 return ((regno) == 0
2566 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2567 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2569 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2570 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2571 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2574 /* Define how to find the value returned by a function.
2575 VALTYPE is the data type of the value (as a tree).
2576 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2577 otherwise, FUNC is 0. */
2579 ix86_function_value (valtype)
2584 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2585 REGPARM_MAX, SSE_REGPARM_MAX,
2586 x86_64_int_return_registers, 0);
2587 /* For zero sized structures, construct_container return NULL, but we need
2588 to keep rest of compiler happy by returning meaningful value. */
2590 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2594 return gen_rtx_REG (TYPE_MODE (valtype),
2595 ix86_value_regno (TYPE_MODE (valtype)));
2598 /* Return false iff type is returned in memory. */
2600 ix86_return_in_memory (type)
2603 int needed_intregs, needed_sseregs;
2606 return !examine_argument (TYPE_MODE (type), type, 1,
2607 &needed_intregs, &needed_sseregs);
2611 if (TYPE_MODE (type) == BLKmode)
2613 else if (MS_AGGREGATE_RETURN
2614 && AGGREGATE_TYPE_P (type)
2615 && int_size_in_bytes(type) <= 8)
2617 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2618 && int_size_in_bytes (type) == 8)
2619 || (int_size_in_bytes (type) > 12
2620 && TYPE_MODE (type) != TImode
2621 && TYPE_MODE (type) != TFmode
2622 && !VECTOR_MODE_P (TYPE_MODE (type))))
2628 /* Define how to find the value returned by a library function
2629 assuming the value has mode MODE. */
2631 ix86_libcall_value (mode)
2632 enum machine_mode mode;
2642 return gen_rtx_REG (mode, FIRST_SSE_REG);
2645 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2647 return gen_rtx_REG (mode, 0);
2651 return gen_rtx_REG (mode, ix86_value_regno (mode));
2654 /* Given a mode, return the register to use for a return value. */
2657 ix86_value_regno (mode)
2658 enum machine_mode mode;
2660 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2661 return FIRST_FLOAT_REG;
2662 if (mode == TImode || VECTOR_MODE_P (mode))
2663 return FIRST_SSE_REG;
2667 /* Create the va_list data type. */
2670 ix86_build_va_list ()
2672 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2674 /* For i386 we use plain pointer to argument area. */
2676 return build_pointer_type (char_type_node);
2678 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2679 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2681 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2682 unsigned_type_node);
2683 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2684 unsigned_type_node);
2685 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2687 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2690 DECL_FIELD_CONTEXT (f_gpr) = record;
2691 DECL_FIELD_CONTEXT (f_fpr) = record;
2692 DECL_FIELD_CONTEXT (f_ovf) = record;
2693 DECL_FIELD_CONTEXT (f_sav) = record;
2695 TREE_CHAIN (record) = type_decl;
2696 TYPE_NAME (record) = type_decl;
2697 TYPE_FIELDS (record) = f_gpr;
2698 TREE_CHAIN (f_gpr) = f_fpr;
2699 TREE_CHAIN (f_fpr) = f_ovf;
2700 TREE_CHAIN (f_ovf) = f_sav;
2702 layout_type (record);
2704 /* The correct type is an array type of one element. */
2705 return build_array_type (record, build_index_type (size_zero_node));
2708 /* Perform any needed actions needed for a function that is receiving a
2709 variable number of arguments.
2713 MODE and TYPE are the mode and type of the current parameter.
2715 PRETEND_SIZE is a variable that should be set to the amount of stack
2716 that must be pushed by the prolog to pretend that our caller pushed
2719 Normally, this macro will push all remaining incoming registers on the
2720 stack and set PRETEND_SIZE to the length of the registers pushed. */
2723 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2724 CUMULATIVE_ARGS *cum;
2725 enum machine_mode mode;
2727 int *pretend_size ATTRIBUTE_UNUSED;
2731 CUMULATIVE_ARGS next_cum;
2732 rtx save_area = NULL_RTX, mem;
2745 /* Indicate to allocate space on the stack for varargs save area. */
2746 ix86_save_varrargs_registers = 1;
2748 fntype = TREE_TYPE (current_function_decl);
2749 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2750 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2751 != void_type_node));
2753 /* For varargs, we do not want to skip the dummy va_dcl argument.
2754 For stdargs, we do want to skip the last named argument. */
2757 function_arg_advance (&next_cum, mode, type, 1);
2760 save_area = frame_pointer_rtx;
2762 set = get_varargs_alias_set ();
2764 for (i = next_cum.regno; i < ix86_regparm; i++)
2766 mem = gen_rtx_MEM (Pmode,
2767 plus_constant (save_area, i * UNITS_PER_WORD));
2768 set_mem_alias_set (mem, set);
2769 emit_move_insn (mem, gen_rtx_REG (Pmode,
2770 x86_64_int_parameter_registers[i]));
2773 if (next_cum.sse_nregs)
2775 /* Now emit code to save SSE registers. The AX parameter contains number
2776 of SSE parameter registers used to call this function. We use
2777 sse_prologue_save insn template that produces computed jump across
2778 SSE saves. We need some preparation work to get this working. */
2780 label = gen_label_rtx ();
2781 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2783 /* Compute address to jump to :
2784 label - 5*eax + nnamed_sse_arguments*5 */
2785 tmp_reg = gen_reg_rtx (Pmode);
2786 nsse_reg = gen_reg_rtx (Pmode);
2787 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2788 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2789 gen_rtx_MULT (Pmode, nsse_reg,
2791 if (next_cum.sse_regno)
2794 gen_rtx_CONST (DImode,
2795 gen_rtx_PLUS (DImode,
2797 GEN_INT (next_cum.sse_regno * 4))));
2799 emit_move_insn (nsse_reg, label_ref);
2800 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2802 /* Compute address of memory block we save into. We always use pointer
2803 pointing 127 bytes after first byte to store - this is needed to keep
2804 instruction size limited by 4 bytes. */
2805 tmp_reg = gen_reg_rtx (Pmode);
2806 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2807 plus_constant (save_area,
2808 8 * REGPARM_MAX + 127)));
2809 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2810 set_mem_alias_set (mem, set);
2811 set_mem_align (mem, BITS_PER_WORD);
2813 /* And finally do the dirty job! */
2814 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2815 GEN_INT (next_cum.sse_regno), label));
2820 /* Implement va_start. */
2823 ix86_va_start (valist, nextarg)
2827 HOST_WIDE_INT words, n_gpr, n_fpr;
2828 tree f_gpr, f_fpr, f_ovf, f_sav;
2829 tree gpr, fpr, ovf, sav, t;
2831 /* Only 64bit target needs something special. */
2834 std_expand_builtin_va_start (valist, nextarg);
2838 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2839 f_fpr = TREE_CHAIN (f_gpr);
2840 f_ovf = TREE_CHAIN (f_fpr);
2841 f_sav = TREE_CHAIN (f_ovf);
2843 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2844 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2845 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2846 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2847 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2849 /* Count number of gp and fp argument registers used. */
2850 words = current_function_args_info.words;
2851 n_gpr = current_function_args_info.regno;
2852 n_fpr = current_function_args_info.sse_regno;
2854 if (TARGET_DEBUG_ARG)
2855 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2856 (int) words, (int) n_gpr, (int) n_fpr);
2858 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2859 build_int_2 (n_gpr * 8, 0));
2860 TREE_SIDE_EFFECTS (t) = 1;
2861 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2863 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2864 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2865 TREE_SIDE_EFFECTS (t) = 1;
2866 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2868 /* Find the overflow area. */
2869 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2871 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2872 build_int_2 (words * UNITS_PER_WORD, 0));
2873 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2874 TREE_SIDE_EFFECTS (t) = 1;
2875 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2877 /* Find the register save area.
2878 Prologue of the function save it right above stack frame. */
2879 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2880 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2881 TREE_SIDE_EFFECTS (t) = 1;
2882 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2885 /* Implement va_arg. */
2887 ix86_va_arg (valist, type)
2890 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2891 tree f_gpr, f_fpr, f_ovf, f_sav;
2892 tree gpr, fpr, ovf, sav, t;
2894 rtx lab_false, lab_over = NULL_RTX;
2899 /* Only 64bit target needs something special. */
2902 return std_expand_builtin_va_arg (valist, type);
2905 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2906 f_fpr = TREE_CHAIN (f_gpr);
2907 f_ovf = TREE_CHAIN (f_fpr);
2908 f_sav = TREE_CHAIN (f_ovf);
2910 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2911 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2912 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2913 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2914 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2916 size = int_size_in_bytes (type);
2919 /* Passed by reference. */
2921 type = build_pointer_type (type);
2922 size = int_size_in_bytes (type);
2924 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2926 container = construct_container (TYPE_MODE (type), type, 0,
2927 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2929 * Pull the value out of the saved registers ...
2932 addr_rtx = gen_reg_rtx (Pmode);
2936 rtx int_addr_rtx, sse_addr_rtx;
2937 int needed_intregs, needed_sseregs;
2940 lab_over = gen_label_rtx ();
2941 lab_false = gen_label_rtx ();
2943 examine_argument (TYPE_MODE (type), type, 0,
2944 &needed_intregs, &needed_sseregs);
2947 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2948 || TYPE_ALIGN (type) > 128);
2950 /* In case we are passing structure, verify that it is consecutive block
2951 on the register save area. If not we need to do moves. */
2952 if (!need_temp && !REG_P (container))
2954 /* Verify that all registers are strictly consecutive */
2955 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2959 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2961 rtx slot = XVECEXP (container, 0, i);
2962 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2963 || INTVAL (XEXP (slot, 1)) != i * 16)
2971 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2973 rtx slot = XVECEXP (container, 0, i);
2974 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2975 || INTVAL (XEXP (slot, 1)) != i * 8)
2982 int_addr_rtx = addr_rtx;
2983 sse_addr_rtx = addr_rtx;
2987 int_addr_rtx = gen_reg_rtx (Pmode);
2988 sse_addr_rtx = gen_reg_rtx (Pmode);
2990 /* First ensure that we fit completely in registers. */
2993 emit_cmp_and_jump_insns (expand_expr
2994 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2995 GEN_INT ((REGPARM_MAX - needed_intregs +
2996 1) * 8), GE, const1_rtx, SImode,
3001 emit_cmp_and_jump_insns (expand_expr
3002 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3003 GEN_INT ((SSE_REGPARM_MAX -
3004 needed_sseregs + 1) * 16 +
3005 REGPARM_MAX * 8), GE, const1_rtx,
3006 SImode, 1, lab_false);
3009 /* Compute index to start of area used for integer regs. */
3012 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3013 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3014 if (r != int_addr_rtx)
3015 emit_move_insn (int_addr_rtx, r);
3019 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3020 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3021 if (r != sse_addr_rtx)
3022 emit_move_insn (sse_addr_rtx, r);
3029 /* Never use the memory itself, as it has the alias set. */
3030 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3031 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3032 set_mem_alias_set (mem, get_varargs_alias_set ());
3033 set_mem_align (mem, BITS_PER_UNIT);
3035 for (i = 0; i < XVECLEN (container, 0); i++)
3037 rtx slot = XVECEXP (container, 0, i);
3038 rtx reg = XEXP (slot, 0);
3039 enum machine_mode mode = GET_MODE (reg);
3045 if (SSE_REGNO_P (REGNO (reg)))
3047 src_addr = sse_addr_rtx;
3048 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3052 src_addr = int_addr_rtx;
3053 src_offset = REGNO (reg) * 8;
3055 src_mem = gen_rtx_MEM (mode, src_addr);
3056 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3057 src_mem = adjust_address (src_mem, mode, src_offset);
3058 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3059 emit_move_insn (dest_mem, src_mem);
3066 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3067 build_int_2 (needed_intregs * 8, 0));
3068 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3069 TREE_SIDE_EFFECTS (t) = 1;
3070 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3075 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3076 build_int_2 (needed_sseregs * 16, 0));
3077 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3078 TREE_SIDE_EFFECTS (t) = 1;
3079 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3082 emit_jump_insn (gen_jump (lab_over));
3084 emit_label (lab_false);
3087 /* ... otherwise out of the overflow area. */
3089 /* Care for on-stack alignment if needed. */
3090 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3094 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3095 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3096 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3100 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3102 emit_move_insn (addr_rtx, r);
3105 build (PLUS_EXPR, TREE_TYPE (t), t,
3106 build_int_2 (rsize * UNITS_PER_WORD, 0));
3107 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3108 TREE_SIDE_EFFECTS (t) = 1;
3109 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3112 emit_label (lab_over);
3116 r = gen_rtx_MEM (Pmode, addr_rtx);
3117 set_mem_alias_set (r, get_varargs_alias_set ());
3118 emit_move_insn (addr_rtx, r);
3124 /* Return nonzero if OP is either a i387 or SSE fp register. */
3126 any_fp_register_operand (op, mode)
3128 enum machine_mode mode ATTRIBUTE_UNUSED;
3130 return ANY_FP_REG_P (op);
3133 /* Return nonzero if OP is an i387 fp register. */
3135 fp_register_operand (op, mode)
3137 enum machine_mode mode ATTRIBUTE_UNUSED;
3139 return FP_REG_P (op);
3142 /* Return nonzero if OP is a non-fp register_operand. */
3144 register_and_not_any_fp_reg_operand (op, mode)
3146 enum machine_mode mode;
3148 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3151 /* Return nonzero if OP is a register operand other than an
3152 i387 fp register. */
3154 register_and_not_fp_reg_operand (op, mode)
3156 enum machine_mode mode;
3158 return register_operand (op, mode) && !FP_REG_P (op);
3161 /* Return nonzero if OP is general operand representable on x86_64. */
3164 x86_64_general_operand (op, mode)
3166 enum machine_mode mode;
3169 return general_operand (op, mode);
3170 if (nonimmediate_operand (op, mode))
3172 return x86_64_sign_extended_value (op);
3175 /* Return nonzero if OP is general operand representable on x86_64
3176 as either sign extended or zero extended constant. */
3179 x86_64_szext_general_operand (op, mode)
3181 enum machine_mode mode;
3184 return general_operand (op, mode);
3185 if (nonimmediate_operand (op, mode))
3187 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3190 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3193 x86_64_nonmemory_operand (op, mode)
3195 enum machine_mode mode;
3198 return nonmemory_operand (op, mode);
3199 if (register_operand (op, mode))
3201 return x86_64_sign_extended_value (op);
3204 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3207 x86_64_movabs_operand (op, mode)
3209 enum machine_mode mode;
3211 if (!TARGET_64BIT || !flag_pic)
3212 return nonmemory_operand (op, mode);
3213 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3215 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3220 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3223 x86_64_szext_nonmemory_operand (op, mode)
3225 enum machine_mode mode;
3228 return nonmemory_operand (op, mode);
3229 if (register_operand (op, mode))
3231 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3234 /* Return nonzero if OP is immediate operand representable on x86_64. */
3237 x86_64_immediate_operand (op, mode)
3239 enum machine_mode mode;
3242 return immediate_operand (op, mode);
3243 return x86_64_sign_extended_value (op);
3246 /* Return nonzero if OP is immediate operand representable on x86_64. */
3249 x86_64_zext_immediate_operand (op, mode)
3251 enum machine_mode mode ATTRIBUTE_UNUSED;
3253 return x86_64_zero_extended_value (op);
3256 /* Return nonzero if OP is (const_int 1), else return zero. */
3259 const_int_1_operand (op, mode)
3261 enum machine_mode mode ATTRIBUTE_UNUSED;
3263 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3266 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3267 for shift & compare patterns, as shifting by 0 does not change flags),
3268 else return zero. */
3271 const_int_1_31_operand (op, mode)
3273 enum machine_mode mode ATTRIBUTE_UNUSED;
3275 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3278 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3279 reference and a constant. */
3282 symbolic_operand (op, mode)
3284 enum machine_mode mode ATTRIBUTE_UNUSED;
3286 switch (GET_CODE (op))
3294 if (GET_CODE (op) == SYMBOL_REF
3295 || GET_CODE (op) == LABEL_REF
3296 || (GET_CODE (op) == UNSPEC
3297 && (XINT (op, 1) == UNSPEC_GOT
3298 || XINT (op, 1) == UNSPEC_GOTOFF
3299 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3301 if (GET_CODE (op) != PLUS
3302 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3306 if (GET_CODE (op) == SYMBOL_REF
3307 || GET_CODE (op) == LABEL_REF)
3309 /* Only @GOTOFF gets offsets. */
3310 if (GET_CODE (op) != UNSPEC
3311 || XINT (op, 1) != UNSPEC_GOTOFF)
3314 op = XVECEXP (op, 0, 0);
3315 if (GET_CODE (op) == SYMBOL_REF
3316 || GET_CODE (op) == LABEL_REF)
3325 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3328 pic_symbolic_operand (op, mode)
3330 enum machine_mode mode ATTRIBUTE_UNUSED;
3332 if (GET_CODE (op) != CONST)
3337 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3342 if (GET_CODE (op) == UNSPEC)
3344 if (GET_CODE (op) != PLUS
3345 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3348 if (GET_CODE (op) == UNSPEC)
3354 /* Return true if OP is a symbolic operand that resolves locally. */
3357 local_symbolic_operand (op, mode)
3359 enum machine_mode mode ATTRIBUTE_UNUSED;
3361 if (GET_CODE (op) == CONST
3362 && GET_CODE (XEXP (op, 0)) == PLUS
3363 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3364 op = XEXP (XEXP (op, 0), 0);
3366 if (GET_CODE (op) == LABEL_REF)
3369 if (GET_CODE (op) != SYMBOL_REF)
3372 /* These we've been told are local by varasm and encode_section_info
3374 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3377 /* There is, however, a not insubstantial body of code in the rest of
3378 the compiler that assumes it can just stick the results of
3379 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3380 /* ??? This is a hack. Should update the body of the compiler to
3381 always create a DECL an invoke targetm.encode_section_info. */
3382 if (strncmp (XSTR (op, 0), internal_label_prefix,
3383 internal_label_prefix_len) == 0)
3389 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3392 tls_symbolic_operand (op, mode)
3394 enum machine_mode mode ATTRIBUTE_UNUSED;
3396 const char *symbol_str;
3398 if (GET_CODE (op) != SYMBOL_REF)
3400 symbol_str = XSTR (op, 0);
3402 if (symbol_str[0] != '%')
3404 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3408 tls_symbolic_operand_1 (op, kind)
3410 enum tls_model kind;
3412 const char *symbol_str;
3414 if (GET_CODE (op) != SYMBOL_REF)
3416 symbol_str = XSTR (op, 0);
3418 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3422 global_dynamic_symbolic_operand (op, mode)
3424 enum machine_mode mode ATTRIBUTE_UNUSED;
3426 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3430 local_dynamic_symbolic_operand (op, mode)
3432 enum machine_mode mode ATTRIBUTE_UNUSED;
3434 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3438 initial_exec_symbolic_operand (op, mode)
3440 enum machine_mode mode ATTRIBUTE_UNUSED;
3442 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3446 local_exec_symbolic_operand (op, mode)
3448 enum machine_mode mode ATTRIBUTE_UNUSED;
3450 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3453 /* Test for a valid operand for a call instruction. Don't allow the
3454 arg pointer register or virtual regs since they may decay into
3455 reg + const, which the patterns can't handle. */
3458 call_insn_operand (op, mode)
3460 enum machine_mode mode ATTRIBUTE_UNUSED;
3462 /* Disallow indirect through a virtual register. This leads to
3463 compiler aborts when trying to eliminate them. */
3464 if (GET_CODE (op) == REG
3465 && (op == arg_pointer_rtx
3466 || op == frame_pointer_rtx
3467 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3468 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3471 /* Disallow `call 1234'. Due to varying assembler lameness this
3472 gets either rejected or translated to `call .+1234'. */
3473 if (GET_CODE (op) == CONST_INT)
3476 /* Explicitly allow SYMBOL_REF even if pic. */
3477 if (GET_CODE (op) == SYMBOL_REF)
3480 /* Otherwise we can allow any general_operand in the address. */
3481 return general_operand (op, Pmode);
3484 /* Test for a valid operand for a call instruction. Don't allow the
3485 arg pointer register or virtual regs since they may decay into
3486 reg + const, which the patterns can't handle. */
3489 sibcall_insn_operand (op, mode)
3491 enum machine_mode mode ATTRIBUTE_UNUSED;
3493 /* Disallow indirect through a virtual register. This leads to
3494 compiler aborts when trying to eliminate them. */
3495 if (GET_CODE (op) == REG
3496 && (op == arg_pointer_rtx
3497 || op == frame_pointer_rtx
3498 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3499 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3502 /* Explicitly allow SYMBOL_REF even if pic. */
3503 if (GET_CODE (op) == SYMBOL_REF)
3506 /* Otherwise we can only allow register operands. */
3507 return register_operand (op, Pmode);
3511 constant_call_address_operand (op, mode)
3513 enum machine_mode mode ATTRIBUTE_UNUSED;
3515 if (GET_CODE (op) == CONST
3516 && GET_CODE (XEXP (op, 0)) == PLUS
3517 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3518 op = XEXP (XEXP (op, 0), 0);
3519 return GET_CODE (op) == SYMBOL_REF;
3522 /* Match exactly zero and one. */
3525 const0_operand (op, mode)
3527 enum machine_mode mode;
3529 return op == CONST0_RTX (mode);
3533 const1_operand (op, mode)
3535 enum machine_mode mode ATTRIBUTE_UNUSED;
3537 return op == const1_rtx;
3540 /* Match 2, 4, or 8. Used for leal multiplicands. */
3543 const248_operand (op, mode)
3545 enum machine_mode mode ATTRIBUTE_UNUSED;
3547 return (GET_CODE (op) == CONST_INT
3548 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3551 /* True if this is a constant appropriate for an increment or decrement. */
3554 incdec_operand (op, mode)
3556 enum machine_mode mode ATTRIBUTE_UNUSED;
3558 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3559 registers, since carry flag is not set. */
3560 if (TARGET_PENTIUM4 && !optimize_size)
3562 return op == const1_rtx || op == constm1_rtx;
3565 /* Return nonzero if OP is acceptable as operand of DImode shift
3569 shiftdi_operand (op, mode)
3571 enum machine_mode mode ATTRIBUTE_UNUSED;
3574 return nonimmediate_operand (op, mode);
3576 return register_operand (op, mode);
3579 /* Return false if this is the stack pointer, or any other fake
3580 register eliminable to the stack pointer. Otherwise, this is
3583 This is used to prevent esp from being used as an index reg.
3584 Which would only happen in pathological cases. */
3587 reg_no_sp_operand (op, mode)
3589 enum machine_mode mode;
3592 if (GET_CODE (t) == SUBREG)
3594 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3597 return register_operand (op, mode);
3601 mmx_reg_operand (op, mode)
3603 enum machine_mode mode ATTRIBUTE_UNUSED;
3605 return MMX_REG_P (op);
3608 /* Return false if this is any eliminable register. Otherwise
3612 general_no_elim_operand (op, mode)
3614 enum machine_mode mode;
3617 if (GET_CODE (t) == SUBREG)
3619 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3620 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3621 || t == virtual_stack_dynamic_rtx)
3624 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3625 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3628 return general_operand (op, mode);
3631 /* Return false if this is any eliminable register. Otherwise
3632 register_operand or const_int. */
3635 nonmemory_no_elim_operand (op, mode)
3637 enum machine_mode mode;
3640 if (GET_CODE (t) == SUBREG)
3642 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3643 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3644 || t == virtual_stack_dynamic_rtx)
3647 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3650 /* Return false if this is any eliminable register or stack register,
3651 otherwise work like register_operand. */
3654 index_register_operand (op, mode)
3656 enum machine_mode mode;
3659 if (GET_CODE (t) == SUBREG)
3663 if (t == arg_pointer_rtx
3664 || t == frame_pointer_rtx
3665 || t == virtual_incoming_args_rtx
3666 || t == virtual_stack_vars_rtx
3667 || t == virtual_stack_dynamic_rtx
3668 || REGNO (t) == STACK_POINTER_REGNUM)
3671 return general_operand (op, mode);
3674 /* Return true if op is a Q_REGS class register. */
3677 q_regs_operand (op, mode)
3679 enum machine_mode mode;
3681 if (mode != VOIDmode && GET_MODE (op) != mode)
3683 if (GET_CODE (op) == SUBREG)
3684 op = SUBREG_REG (op);
3685 return ANY_QI_REG_P (op);
3688 /* Return true if op is an flags register. */
3691 flags_reg_operand (op, mode)
3693 enum machine_mode mode;
3695 if (mode != VOIDmode && GET_MODE (op) != mode)
3697 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3700 /* Return true if op is a NON_Q_REGS class register. */
3703 non_q_regs_operand (op, mode)
3705 enum machine_mode mode;
3707 if (mode != VOIDmode && GET_MODE (op) != mode)
3709 if (GET_CODE (op) == SUBREG)
3710 op = SUBREG_REG (op);
3711 return NON_QI_REG_P (op);
3715 zero_extended_scalar_load_operand (op, mode)
3717 enum machine_mode mode ATTRIBUTE_UNUSED;
3720 if (GET_CODE (op) != MEM)
3722 op = maybe_get_pool_constant (op);
3725 if (GET_CODE (op) != CONST_VECTOR)
3728 (GET_MODE_SIZE (GET_MODE (op)) /
3729 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3730 for (n_elts--; n_elts > 0; n_elts--)
3732 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3733 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3739 /* Return 1 when OP is operand acceptable for standard SSE move. */
3741 vector_move_operand (op, mode)
3743 enum machine_mode mode;
3745 if (nonimmediate_operand (op, mode))
3747 if (GET_MODE (op) != mode && mode != VOIDmode)
3749 return (op == CONST0_RTX (GET_MODE (op)));
3752 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3755 sse_comparison_operator (op, mode)
3757 enum machine_mode mode ATTRIBUTE_UNUSED;
3759 enum rtx_code code = GET_CODE (op);
3762 /* Operations supported directly. */
3772 /* These are equivalent to ones above in non-IEEE comparisons. */
3779 return !TARGET_IEEE_FP;
3784 /* Return 1 if OP is a valid comparison operator in valid mode. */
3786 ix86_comparison_operator (op, mode)
3788 enum machine_mode mode;
3790 enum machine_mode inmode;
3791 enum rtx_code code = GET_CODE (op);
3792 if (mode != VOIDmode && GET_MODE (op) != mode)
3794 if (GET_RTX_CLASS (code) != '<')
3796 inmode = GET_MODE (XEXP (op, 0));
3798 if (inmode == CCFPmode || inmode == CCFPUmode)
3800 enum rtx_code second_code, bypass_code;
3801 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3802 return (bypass_code == NIL && second_code == NIL);
3809 if (inmode == CCmode || inmode == CCGCmode
3810 || inmode == CCGOCmode || inmode == CCNOmode)
3813 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3814 if (inmode == CCmode)
3818 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3826 /* Return 1 if OP is a valid comparison operator testing carry flag
3829 ix86_carry_flag_operator (op, mode)
3831 enum machine_mode mode;
3833 enum machine_mode inmode;
3834 enum rtx_code code = GET_CODE (op);
3836 if (mode != VOIDmode && GET_MODE (op) != mode)
3838 if (GET_RTX_CLASS (code) != '<')
3840 inmode = GET_MODE (XEXP (op, 0));
3841 if (GET_CODE (XEXP (op, 0)) != REG
3842 || REGNO (XEXP (op, 0)) != 17
3843 || XEXP (op, 1) != const0_rtx)
3846 if (inmode == CCFPmode || inmode == CCFPUmode)
3848 enum rtx_code second_code, bypass_code;
3850 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3851 if (bypass_code != NIL || second_code != NIL)
3853 code = ix86_fp_compare_code_to_integer (code);
3855 else if (inmode != CCmode)
3860 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3863 fcmov_comparison_operator (op, mode)
3865 enum machine_mode mode;
3867 enum machine_mode inmode;
3868 enum rtx_code code = GET_CODE (op);
3870 if (mode != VOIDmode && GET_MODE (op) != mode)
3872 if (GET_RTX_CLASS (code) != '<')
3874 inmode = GET_MODE (XEXP (op, 0));
3875 if (inmode == CCFPmode || inmode == CCFPUmode)
3877 enum rtx_code second_code, bypass_code;
3879 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3880 if (bypass_code != NIL || second_code != NIL)
3882 code = ix86_fp_compare_code_to_integer (code);
3884 /* i387 supports just limited amount of conditional codes. */
3887 case LTU: case GTU: case LEU: case GEU:
3888 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3891 case ORDERED: case UNORDERED:
3899 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3902 promotable_binary_operator (op, mode)
3904 enum machine_mode mode ATTRIBUTE_UNUSED;
3906 switch (GET_CODE (op))
3909 /* Modern CPUs have same latency for HImode and SImode multiply,
3910 but 386 and 486 do HImode multiply faster. */
3911 return ix86_cpu > PROCESSOR_I486;
3923 /* Nearly general operand, but accept any const_double, since we wish
3924 to be able to drop them into memory rather than have them get pulled
3928 cmp_fp_expander_operand (op, mode)
3930 enum machine_mode mode;
3932 if (mode != VOIDmode && mode != GET_MODE (op))
3934 if (GET_CODE (op) == CONST_DOUBLE)
3936 return general_operand (op, mode);
3939 /* Match an SI or HImode register for a zero_extract. */
3942 ext_register_operand (op, mode)
3944 enum machine_mode mode ATTRIBUTE_UNUSED;
3947 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3948 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3951 if (!register_operand (op, VOIDmode))
3954 /* Be careful to accept only registers having upper parts. */
3955 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3956 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3959 /* Return 1 if this is a valid binary floating-point operation.
3960 OP is the expression matched, and MODE is its mode. */
3963 binary_fp_operator (op, mode)
3965 enum machine_mode mode;
3967 if (mode != VOIDmode && mode != GET_MODE (op))
3970 switch (GET_CODE (op))
3976 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3984 mult_operator (op, mode)
3986 enum machine_mode mode ATTRIBUTE_UNUSED;
3988 return GET_CODE (op) == MULT;
3992 div_operator (op, mode)
3994 enum machine_mode mode ATTRIBUTE_UNUSED;
3996 return GET_CODE (op) == DIV;
4000 arith_or_logical_operator (op, mode)
4002 enum machine_mode mode;
4004 return ((mode == VOIDmode || GET_MODE (op) == mode)
4005 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4006 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4009 /* Returns 1 if OP is memory operand with a displacement. */
4012 memory_displacement_operand (op, mode)
4014 enum machine_mode mode;
4016 struct ix86_address parts;
4018 if (! memory_operand (op, mode))
4021 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4024 return parts.disp != NULL_RTX;
4027 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4028 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4030 ??? It seems likely that this will only work because cmpsi is an
4031 expander, and no actual insns use this. */
4034 cmpsi_operand (op, mode)
4036 enum machine_mode mode;
4038 if (nonimmediate_operand (op, mode))
4041 if (GET_CODE (op) == AND
4042 && GET_MODE (op) == SImode
4043 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4044 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4045 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4046 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4047 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4048 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4054 /* Returns 1 if OP is memory operand that can not be represented by the
4058 long_memory_operand (op, mode)
4060 enum machine_mode mode;
4062 if (! memory_operand (op, mode))
4065 return memory_address_length (op) != 0;
4068 /* Return nonzero if the rtx is known aligned. */
4071 aligned_operand (op, mode)
4073 enum machine_mode mode;
4075 struct ix86_address parts;
4077 if (!general_operand (op, mode))
4080 /* Registers and immediate operands are always "aligned". */
4081 if (GET_CODE (op) != MEM)
4084 /* Don't even try to do any aligned optimizations with volatiles. */
4085 if (MEM_VOLATILE_P (op))
4090 /* Pushes and pops are only valid on the stack pointer. */
4091 if (GET_CODE (op) == PRE_DEC
4092 || GET_CODE (op) == POST_INC)
4095 /* Decode the address. */
4096 if (! ix86_decompose_address (op, &parts))
4099 if (parts.base && GET_CODE (parts.base) == SUBREG)
4100 parts.base = SUBREG_REG (parts.base);
4101 if (parts.index && GET_CODE (parts.index) == SUBREG)
4102 parts.index = SUBREG_REG (parts.index);
4104 /* Look for some component that isn't known to be aligned. */
4108 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4113 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4118 if (GET_CODE (parts.disp) != CONST_INT
4119 || (INTVAL (parts.disp) & 3) != 0)
4123 /* Didn't find one -- this must be an aligned address. */
4127 /* Return true if the constant is something that can be loaded with
4128 a special instruction. Only handle 0.0 and 1.0; others are less
4132 standard_80387_constant_p (x)
4135 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4137 /* Note that on the 80387, other constants, such as pi, that we should support
4138 too. On some machines, these are much slower to load as standard constant,
4139 than to load from doubles in memory. */
4140 if (x == CONST0_RTX (GET_MODE (x)))
4142 if (x == CONST1_RTX (GET_MODE (x)))
4147 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4150 standard_sse_constant_p (x)
4153 if (x == const0_rtx)
4155 return (x == CONST0_RTX (GET_MODE (x)));
4158 /* Returns 1 if OP contains a symbol reference */
4161 symbolic_reference_mentioned_p (op)
4164 register const char *fmt;
4167 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4170 fmt = GET_RTX_FORMAT (GET_CODE (op));
4171 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4177 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4178 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4182 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4189 /* Return 1 if it is appropriate to emit `ret' instructions in the
4190 body of a function. Do this only if the epilogue is simple, needing a
4191 couple of insns. Prior to reloading, we can't tell how many registers
4192 must be saved, so return 0 then. Return 0 if there is no frame
4193 marker to de-allocate.
4195 If NON_SAVING_SETJMP is defined and true, then it is not possible
4196 for the epilogue to be simple, so return 0. This is a special case
4197 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4198 until final, but jump_optimize may need to know sooner if a
4202 ix86_can_use_return_insn_p ()
4204 struct ix86_frame frame;
4206 #ifdef NON_SAVING_SETJMP
4207 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4211 if (! reload_completed || frame_pointer_needed)
4214 /* Don't allow more than 32 pop, since that's all we can do
4215 with one instruction. */
4216 if (current_function_pops_args
4217 && current_function_args_size >= 32768)
4220 ix86_compute_frame_layout (&frame);
4221 return frame.to_allocate == 0 && frame.nregs == 0;
4224 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4226 x86_64_sign_extended_value (value)
4229 switch (GET_CODE (value))
4231 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4232 to be at least 32 and this all acceptable constants are
4233 represented as CONST_INT. */
4235 if (HOST_BITS_PER_WIDE_INT == 32)
4239 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4240 return trunc_int_for_mode (val, SImode) == val;
4244 /* For certain code models, the symbolic references are known to fit.
4245 in CM_SMALL_PIC model we know it fits if it is local to the shared
4246 library. Don't count TLS SYMBOL_REFs here, since they should fit
4247 only if inside of UNSPEC handled below. */
4249 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4251 /* For certain code models, the code is near as well. */
4253 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4254 || ix86_cmodel == CM_KERNEL);
4256 /* We also may accept the offsetted memory references in certain special
4259 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4260 switch (XINT (XEXP (value, 0), 1))
4262 case UNSPEC_GOTPCREL:
4264 case UNSPEC_GOTNTPOFF:
4270 if (GET_CODE (XEXP (value, 0)) == PLUS)
4272 rtx op1 = XEXP (XEXP (value, 0), 0);
4273 rtx op2 = XEXP (XEXP (value, 0), 1);
4274 HOST_WIDE_INT offset;
4276 if (ix86_cmodel == CM_LARGE)
4278 if (GET_CODE (op2) != CONST_INT)
4280 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4281 switch (GET_CODE (op1))
4284 /* For CM_SMALL assume that latest object is 16MB before
4285 end of 31bits boundary. We may also accept pretty
4286 large negative constants knowing that all objects are
4287 in the positive half of address space. */
4288 if (ix86_cmodel == CM_SMALL
4289 && offset < 16*1024*1024
4290 && trunc_int_for_mode (offset, SImode) == offset)
4292 /* For CM_KERNEL we know that all object resist in the
4293 negative half of 32bits address space. We may not
4294 accept negative offsets, since they may be just off
4295 and we may accept pretty large positive ones. */
4296 if (ix86_cmodel == CM_KERNEL
4298 && trunc_int_for_mode (offset, SImode) == offset)
4302 /* These conditions are similar to SYMBOL_REF ones, just the
4303 constraints for code models differ. */
4304 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4305 && offset < 16*1024*1024
4306 && trunc_int_for_mode (offset, SImode) == offset)
4308 if (ix86_cmodel == CM_KERNEL
4310 && trunc_int_for_mode (offset, SImode) == offset)
4314 switch (XINT (op1, 1))
4319 && trunc_int_for_mode (offset, SImode) == offset)
4333 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4335 x86_64_zero_extended_value (value)
4338 switch (GET_CODE (value))
4341 if (HOST_BITS_PER_WIDE_INT == 32)
4342 return (GET_MODE (value) == VOIDmode
4343 && !CONST_DOUBLE_HIGH (value));
4347 if (HOST_BITS_PER_WIDE_INT == 32)
4348 return INTVAL (value) >= 0;
4350 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4353 /* For certain code models, the symbolic references are known to fit. */
4355 return ix86_cmodel == CM_SMALL;
4357 /* For certain code models, the code is near as well. */
4359 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4361 /* We also may accept the offsetted memory references in certain special
4364 if (GET_CODE (XEXP (value, 0)) == PLUS)
4366 rtx op1 = XEXP (XEXP (value, 0), 0);
4367 rtx op2 = XEXP (XEXP (value, 0), 1);
4369 if (ix86_cmodel == CM_LARGE)
4371 switch (GET_CODE (op1))
4375 /* For small code model we may accept pretty large positive
4376 offsets, since one bit is available for free. Negative
4377 offsets are limited by the size of NULL pointer area
4378 specified by the ABI. */
4379 if (ix86_cmodel == CM_SMALL
4380 && GET_CODE (op2) == CONST_INT
4381 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4382 && (trunc_int_for_mode (INTVAL (op2), SImode)
4385 /* ??? For the kernel, we may accept adjustment of
4386 -0x10000000, since we know that it will just convert
4387 negative address space to positive, but perhaps this
4388 is not worthwhile. */
4391 /* These conditions are similar to SYMBOL_REF ones, just the
4392 constraints for code models differ. */
4393 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4394 && GET_CODE (op2) == CONST_INT
4395 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4396 && (trunc_int_for_mode (INTVAL (op2), SImode)
4410 /* Value should be nonzero if functions must have frame pointers.
4411 Zero means the frame pointer need not be set up (and parms may
4412 be accessed via the stack pointer) in functions that seem suitable. */
4415 ix86_frame_pointer_required ()
4417 /* If we accessed previous frames, then the generated code expects
4418 to be able to access the saved ebp value in our frame. */
4419 if (cfun->machine->accesses_prev_frame)
4422 /* Several x86 os'es need a frame pointer for other reasons,
4423 usually pertaining to setjmp. */
4424 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4427 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4428 the frame pointer by default. Turn it back on now if we've not
4429 got a leaf function. */
4430 if (TARGET_OMIT_LEAF_FRAME_POINTER
4431 && (!current_function_is_leaf))
4434 if (current_function_profile)
4440 /* Record that the current function accesses previous call frames. */
4443 ix86_setup_frame_addresses ()
4445 cfun->machine->accesses_prev_frame = 1;
4448 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4449 # define USE_HIDDEN_LINKONCE 1
4451 # define USE_HIDDEN_LINKONCE 0
4454 static int pic_labels_used;
4456 /* Fills in the label name that should be used for a pc thunk for
4457 the given register. */
4460 get_pc_thunk_name (name, regno)
4464 if (USE_HIDDEN_LINKONCE)
4465 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4467 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4471 /* This function generates code for -fpic that loads %ebx with
4472 the return address of the caller and then returns. */
4475 ix86_asm_file_end (file)
4481 for (regno = 0; regno < 8; ++regno)
4485 if (! ((pic_labels_used >> regno) & 1))
4488 get_pc_thunk_name (name, regno);
4490 if (USE_HIDDEN_LINKONCE)
4494 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4496 TREE_PUBLIC (decl) = 1;
4497 TREE_STATIC (decl) = 1;
4498 DECL_ONE_ONLY (decl) = 1;
4500 (*targetm.asm_out.unique_section) (decl, 0);
4501 named_section (decl, NULL, 0);
4503 (*targetm.asm_out.globalize_label) (file, name);
4504 fputs ("\t.hidden\t", file);
4505 assemble_name (file, name);
4507 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4512 ASM_OUTPUT_LABEL (file, name);
4515 xops[0] = gen_rtx_REG (SImode, regno);
4516 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4517 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4518 output_asm_insn ("ret", xops);
4522 /* Emit code for the SET_GOT patterns. */
4525 output_set_got (dest)
4531 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4533 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4535 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4538 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4540 output_asm_insn ("call\t%a2", xops);
4543 /* Output the "canonical" label name ("Lxx$pb") here too. This
4544 is what will be referred to by the Mach-O PIC subsystem. */
4545 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4547 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4548 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4551 output_asm_insn ("pop{l}\t%0", xops);
4556 get_pc_thunk_name (name, REGNO (dest));
4557 pic_labels_used |= 1 << REGNO (dest);
4559 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4560 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4561 output_asm_insn ("call\t%X2", xops);
4564 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4565 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4566 else if (!TARGET_MACHO)
4567 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4572 /* Generate an "push" pattern for input ARG. */
4578 return gen_rtx_SET (VOIDmode,
4580 gen_rtx_PRE_DEC (Pmode,
4581 stack_pointer_rtx)),
4585 /* Return >= 0 if there is an unused call-clobbered register available
4586 for the entire function. */
4589 ix86_select_alt_pic_regnum ()
4591 if (current_function_is_leaf && !current_function_profile)
4594 for (i = 2; i >= 0; --i)
4595 if (!regs_ever_live[i])
4599 return INVALID_REGNUM;
4602 /* Return 1 if we need to save REGNO. */
4604 ix86_save_reg (regno, maybe_eh_return)
4606 int maybe_eh_return;
4608 if (pic_offset_table_rtx
4609 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4610 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4611 || current_function_profile
4612 || current_function_calls_eh_return))
4614 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4619 if (current_function_calls_eh_return && maybe_eh_return)
4624 unsigned test = EH_RETURN_DATA_REGNO (i);
4625 if (test == INVALID_REGNUM)
4632 return (regs_ever_live[regno]
4633 && !call_used_regs[regno]
4634 && !fixed_regs[regno]
4635 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4638 /* Return number of registers to be saved on the stack. */
4646 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4647 if (ix86_save_reg (regno, true))
4652 /* Return the offset between two registers, one to be eliminated, and the other
4653 its replacement, at the start of a routine. */
4656 ix86_initial_elimination_offset (from, to)
4660 struct ix86_frame frame;
4661 ix86_compute_frame_layout (&frame);
4663 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4664 return frame.hard_frame_pointer_offset;
4665 else if (from == FRAME_POINTER_REGNUM
4666 && to == HARD_FRAME_POINTER_REGNUM)
4667 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4670 if (to != STACK_POINTER_REGNUM)
4672 else if (from == ARG_POINTER_REGNUM)
4673 return frame.stack_pointer_offset;
4674 else if (from != FRAME_POINTER_REGNUM)
4677 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4681 /* Fill structure ix86_frame about frame of currently computed function. */
4684 ix86_compute_frame_layout (frame)
4685 struct ix86_frame *frame;
4687 HOST_WIDE_INT total_size;
4688 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4690 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4691 HOST_WIDE_INT size = get_frame_size ();
4693 frame->nregs = ix86_nsaved_regs ();
4696 /* Skip return address and saved base pointer. */
4697 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4699 frame->hard_frame_pointer_offset = offset;
4701 /* Do some sanity checking of stack_alignment_needed and
4702 preferred_alignment, since i386 port is the only using those features
4703 that may break easily. */
4705 if (size && !stack_alignment_needed)
4707 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4709 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4711 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4714 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4715 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4717 /* Register save area */
4718 offset += frame->nregs * UNITS_PER_WORD;
4721 if (ix86_save_varrargs_registers)
4723 offset += X86_64_VARARGS_SIZE;
4724 frame->va_arg_size = X86_64_VARARGS_SIZE;
4727 frame->va_arg_size = 0;
4729 /* Align start of frame for local function. */
4730 frame->padding1 = ((offset + stack_alignment_needed - 1)
4731 & -stack_alignment_needed) - offset;
4733 offset += frame->padding1;
4735 /* Frame pointer points here. */
4736 frame->frame_pointer_offset = offset;
4740 /* Add outgoing arguments area. Can be skipped if we eliminated
4741 all the function calls as dead code. */
4742 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4744 offset += current_function_outgoing_args_size;
4745 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4748 frame->outgoing_arguments_size = 0;
4750 /* Align stack boundary. Only needed if we're calling another function
4752 if (!current_function_is_leaf || current_function_calls_alloca)
4753 frame->padding2 = ((offset + preferred_alignment - 1)
4754 & -preferred_alignment) - offset;
4756 frame->padding2 = 0;
4758 offset += frame->padding2;
4760 /* We've reached end of stack frame. */
4761 frame->stack_pointer_offset = offset;
4763 /* Size prologue needs to allocate. */
4764 frame->to_allocate =
4765 (size + frame->padding1 + frame->padding2
4766 + frame->outgoing_arguments_size + frame->va_arg_size);
4768 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4769 && current_function_is_leaf)
4771 frame->red_zone_size = frame->to_allocate;
4772 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4773 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4776 frame->red_zone_size = 0;
4777 frame->to_allocate -= frame->red_zone_size;
4778 frame->stack_pointer_offset -= frame->red_zone_size;
4780 fprintf (stderr, "nregs: %i\n", frame->nregs);
4781 fprintf (stderr, "size: %i\n", size);
4782 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4783 fprintf (stderr, "padding1: %i\n", frame->padding1);
4784 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4785 fprintf (stderr, "padding2: %i\n", frame->padding2);
4786 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4787 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4788 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4789 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4790 frame->hard_frame_pointer_offset);
4791 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4795 /* Emit code to save registers in the prologue. */
4798 ix86_emit_save_regs ()
4803 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4804 if (ix86_save_reg (regno, true))
4806 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4807 RTX_FRAME_RELATED_P (insn) = 1;
4811 /* Emit code to save registers using MOV insns. First register
4812 is restored from POINTER + OFFSET. */
4814 ix86_emit_save_regs_using_mov (pointer, offset)
4816 HOST_WIDE_INT offset;
4821 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4822 if (ix86_save_reg (regno, true))
4824 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4826 gen_rtx_REG (Pmode, regno));
4827 RTX_FRAME_RELATED_P (insn) = 1;
4828 offset += UNITS_PER_WORD;
4832 /* Expand the prologue into a bunch of separate insns. */
4835 ix86_expand_prologue ()
4839 struct ix86_frame frame;
4841 HOST_WIDE_INT allocate;
4843 ix86_compute_frame_layout (&frame);
4846 int count = frame.nregs;
4848 /* The fast prologue uses move instead of push to save registers. This
4849 is significantly longer, but also executes faster as modern hardware
4850 can execute the moves in parallel, but can't do that for push/pop.
4852 Be careful about choosing what prologue to emit: When function takes
4853 many instructions to execute we may use slow version as well as in
4854 case function is known to be outside hot spot (this is known with
4855 feedback only). Weight the size of function by number of registers
4856 to save as it is cheap to use one or two push instructions but very
4857 slow to use many of them. */
4859 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4860 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4861 || (flag_branch_probabilities
4862 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4863 use_fast_prologue_epilogue = 0;
4865 use_fast_prologue_epilogue = !expensive_function_p (count);
4866 if (TARGET_PROLOGUE_USING_MOVE)
4867 use_mov = use_fast_prologue_epilogue;
4870 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4871 slower on all targets. Also sdb doesn't like it. */
4873 if (frame_pointer_needed)
4875 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4876 RTX_FRAME_RELATED_P (insn) = 1;
4878 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4879 RTX_FRAME_RELATED_P (insn) = 1;
4882 allocate = frame.to_allocate;
4883 /* In case we are dealing only with single register and empty frame,
4884 push is equivalent of the mov+add sequence. */
4885 if (allocate == 0 && frame.nregs <= 1)
4889 ix86_emit_save_regs ();
4891 allocate += frame.nregs * UNITS_PER_WORD;
4895 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4897 insn = emit_insn (gen_pro_epilogue_adjust_stack
4898 (stack_pointer_rtx, stack_pointer_rtx,
4899 GEN_INT (-allocate)));
4900 RTX_FRAME_RELATED_P (insn) = 1;
4904 /* ??? Is this only valid for Win32? */
4911 arg0 = gen_rtx_REG (SImode, 0);
4912 emit_move_insn (arg0, GEN_INT (allocate));
4914 sym = gen_rtx_MEM (FUNCTION_MODE,
4915 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4916 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4918 CALL_INSN_FUNCTION_USAGE (insn)
4919 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4920 CALL_INSN_FUNCTION_USAGE (insn));
4922 /* Don't allow scheduling pass to move insns across __alloca
4924 emit_insn (gen_blockage (const0_rtx));
4928 if (!frame_pointer_needed || !frame.to_allocate)
4929 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4931 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4932 -frame.nregs * UNITS_PER_WORD);
4935 #ifdef SUBTARGET_PROLOGUE
4939 pic_reg_used = false;
4940 if (pic_offset_table_rtx
4941 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4942 || current_function_profile))
4944 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4946 if (alt_pic_reg_used != INVALID_REGNUM)
4947 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4949 pic_reg_used = true;
4954 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4956 /* Even with accurate pre-reload life analysis, we can wind up
4957 deleting all references to the pic register after reload.
4958 Consider if cross-jumping unifies two sides of a branch
4959 controlled by a comparison vs the only read from a global.
4960 In which case, allow the set_got to be deleted, though we're
4961 too late to do anything about the ebx save in the prologue. */
4962 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4965 /* Prevent function calls from be scheduled before the call to mcount.
4966 In the pic_reg_used case, make sure that the got load isn't deleted. */
4967 if (current_function_profile)
4968 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4971 /* Emit code to restore saved registers using MOV insns. First register
4972 is restored from POINTER + OFFSET. */
4974 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4977 int maybe_eh_return;
4981 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4982 if (ix86_save_reg (regno, maybe_eh_return))
4984 emit_move_insn (gen_rtx_REG (Pmode, regno),
4985 adjust_address (gen_rtx_MEM (Pmode, pointer),
4987 offset += UNITS_PER_WORD;
4991 /* Restore function stack, frame, and registers. */
4994 ix86_expand_epilogue (style)
4998 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4999 struct ix86_frame frame;
5000 HOST_WIDE_INT offset;
5002 ix86_compute_frame_layout (&frame);
5004 /* Calculate start of saved registers relative to ebp. Special care
5005 must be taken for the normal return case of a function using
5006 eh_return: the eax and edx registers are marked as saved, but not
5007 restored along this path. */
5008 offset = frame.nregs;
5009 if (current_function_calls_eh_return && style != 2)
5011 offset *= -UNITS_PER_WORD;
5013 /* If we're only restoring one register and sp is not valid then
5014 using a move instruction to restore the register since it's
5015 less work than reloading sp and popping the register.
5017 The default code result in stack adjustment using add/lea instruction,
5018 while this code results in LEAVE instruction (or discrete equivalent),
5019 so it is profitable in some other cases as well. Especially when there
5020 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5021 and there is exactly one register to pop. This heuristic may need some
5022 tuning in future. */
5023 if ((!sp_valid && frame.nregs <= 1)
5024 || (TARGET_EPILOGUE_USING_MOVE
5025 && use_fast_prologue_epilogue
5026 && (frame.nregs > 1 || frame.to_allocate))
5027 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5028 || (frame_pointer_needed && TARGET_USE_LEAVE
5029 && use_fast_prologue_epilogue && frame.nregs == 1)
5030 || current_function_calls_eh_return)
5032 /* Restore registers. We can use ebp or esp to address the memory
5033 locations. If both are available, default to ebp, since offsets
5034 are known to be small. Only exception is esp pointing directly to the
5035 end of block of saved registers, where we may simplify addressing
5038 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5039 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5040 frame.to_allocate, style == 2);
5042 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5043 offset, style == 2);
5045 /* eh_return epilogues need %ecx added to the stack pointer. */
5048 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5050 if (frame_pointer_needed)
5052 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5053 tmp = plus_constant (tmp, UNITS_PER_WORD);
5054 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5056 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5057 emit_move_insn (hard_frame_pointer_rtx, tmp);
5059 emit_insn (gen_pro_epilogue_adjust_stack
5060 (stack_pointer_rtx, sa, const0_rtx));
5064 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5065 tmp = plus_constant (tmp, (frame.to_allocate
5066 + frame.nregs * UNITS_PER_WORD));
5067 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5070 else if (!frame_pointer_needed)
5071 emit_insn (gen_pro_epilogue_adjust_stack
5072 (stack_pointer_rtx, stack_pointer_rtx,
5073 GEN_INT (frame.to_allocate
5074 + frame.nregs * UNITS_PER_WORD)));
5075 /* If not an i386, mov & pop is faster than "leave". */
5076 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5077 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5080 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5081 hard_frame_pointer_rtx,
5084 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5086 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5091 /* First step is to deallocate the stack frame so that we can
5092 pop the registers. */
5095 if (!frame_pointer_needed)
5097 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5098 hard_frame_pointer_rtx,
5101 else if (frame.to_allocate)
5102 emit_insn (gen_pro_epilogue_adjust_stack
5103 (stack_pointer_rtx, stack_pointer_rtx,
5104 GEN_INT (frame.to_allocate)));
5106 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5107 if (ix86_save_reg (regno, false))
5110 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5112 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5114 if (frame_pointer_needed)
5116 /* Leave results in shorter dependency chains on CPUs that are
5117 able to grok it fast. */
5118 if (TARGET_USE_LEAVE)
5119 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5120 else if (TARGET_64BIT)
5121 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5123 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5127 /* Sibcall epilogues don't want a return instruction. */
5131 if (current_function_pops_args && current_function_args_size)
5133 rtx popc = GEN_INT (current_function_pops_args);
5135 /* i386 can only pop 64K bytes. If asked to pop more, pop
5136 return address, do explicit add, and jump indirectly to the
5139 if (current_function_pops_args >= 65536)
5141 rtx ecx = gen_rtx_REG (SImode, 2);
5143 /* There are is no "pascal" calling convention in 64bit ABI. */
5147 emit_insn (gen_popsi1 (ecx));
5148 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5149 emit_jump_insn (gen_return_indirect_internal (ecx));
5152 emit_jump_insn (gen_return_pop_internal (popc));
5155 emit_jump_insn (gen_return_internal ());
5158 /* Reset from the function's potential modifications. */
5161 ix86_output_function_epilogue (file, size)
5162 FILE *file ATTRIBUTE_UNUSED;
5163 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5165 if (pic_offset_table_rtx)
5166 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5169 /* Extract the parts of an RTL expression that is a valid memory address
5170 for an instruction. Return 0 if the structure of the address is
5171 grossly off. Return -1 if the address contains ASHIFT, so it is not
5172 strictly valid, but still used for computing length of lea instruction.
5176 ix86_decompose_address (addr, out)
5178 struct ix86_address *out;
5180 rtx base = NULL_RTX;
5181 rtx index = NULL_RTX;
5182 rtx disp = NULL_RTX;
5183 HOST_WIDE_INT scale = 1;
5184 rtx scale_rtx = NULL_RTX;
5187 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5189 else if (GET_CODE (addr) == PLUS)
5191 rtx op0 = XEXP (addr, 0);
5192 rtx op1 = XEXP (addr, 1);
5193 enum rtx_code code0 = GET_CODE (op0);
5194 enum rtx_code code1 = GET_CODE (op1);
5196 if (code0 == REG || code0 == SUBREG)
5198 if (code1 == REG || code1 == SUBREG)
5199 index = op0, base = op1; /* index + base */
5201 base = op0, disp = op1; /* base + displacement */
5203 else if (code0 == MULT)
5205 index = XEXP (op0, 0);
5206 scale_rtx = XEXP (op0, 1);
5207 if (code1 == REG || code1 == SUBREG)
5208 base = op1; /* index*scale + base */
5210 disp = op1; /* index*scale + disp */
5212 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5214 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5215 scale_rtx = XEXP (XEXP (op0, 0), 1);
5216 base = XEXP (op0, 1);
5219 else if (code0 == PLUS)
5221 index = XEXP (op0, 0); /* index + base + disp */
5222 base = XEXP (op0, 1);
5228 else if (GET_CODE (addr) == MULT)
5230 index = XEXP (addr, 0); /* index*scale */
5231 scale_rtx = XEXP (addr, 1);
5233 else if (GET_CODE (addr) == ASHIFT)
5237 /* We're called for lea too, which implements ashift on occasion. */
5238 index = XEXP (addr, 0);
5239 tmp = XEXP (addr, 1);
5240 if (GET_CODE (tmp) != CONST_INT)
5242 scale = INTVAL (tmp);
5243 if ((unsigned HOST_WIDE_INT) scale > 3)
5249 disp = addr; /* displacement */
5251 /* Extract the integral value of scale. */
5254 if (GET_CODE (scale_rtx) != CONST_INT)
5256 scale = INTVAL (scale_rtx);
5259 /* Allow arg pointer and stack pointer as index if there is not scaling */
5260 if (base && index && scale == 1
5261 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5262 || index == stack_pointer_rtx))
5269 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5270 if ((base == hard_frame_pointer_rtx
5271 || base == frame_pointer_rtx
5272 || base == arg_pointer_rtx) && !disp)
5275 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5276 Avoid this by transforming to [%esi+0]. */
5277 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5278 && base && !index && !disp
5280 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5283 /* Special case: encode reg+reg instead of reg*2. */
5284 if (!base && index && scale && scale == 2)
5285 base = index, scale = 1;
5287 /* Special case: scaling cannot be encoded without base or displacement. */
5288 if (!base && !disp && index && scale != 1)
5299 /* Return cost of the memory address x.
5300 For i386, it is better to use a complex address than let gcc copy
5301 the address into a reg and make a new pseudo. But not if the address
5302 requires to two regs - that would mean more pseudos with longer
5305 ix86_address_cost (x)
5308 struct ix86_address parts;
5311 if (!ix86_decompose_address (x, &parts))
5314 if (parts.base && GET_CODE (parts.base) == SUBREG)
5315 parts.base = SUBREG_REG (parts.base);
5316 if (parts.index && GET_CODE (parts.index) == SUBREG)
5317 parts.index = SUBREG_REG (parts.index);
5319 /* More complex memory references are better. */
5320 if (parts.disp && parts.disp != const0_rtx)
5323 /* Attempt to minimize number of registers in the address. */
5325 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5327 && (!REG_P (parts.index)
5328 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5332 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5334 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5335 && parts.base != parts.index)
5338 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5339 since it's predecode logic can't detect the length of instructions
5340 and it degenerates to vector decoded. Increase cost of such
5341 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5342 to split such addresses or even refuse such addresses at all.
5344 Following addressing modes are affected:
5349 The first and last case may be avoidable by explicitly coding the zero in
5350 memory address, but I don't have AMD-K6 machine handy to check this
5354 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5355 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5356 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5362 /* If X is a machine specific address (i.e. a symbol or label being
5363 referenced as a displacement from the GOT implemented using an
5364 UNSPEC), then return the base term. Otherwise return X. */
5367 ix86_find_base_term (x)
5374 if (GET_CODE (x) != CONST)
5377 if (GET_CODE (term) == PLUS
5378 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5379 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5380 term = XEXP (term, 0);
5381 if (GET_CODE (term) != UNSPEC
5382 || XINT (term, 1) != UNSPEC_GOTPCREL)
5385 term = XVECEXP (term, 0, 0);
5387 if (GET_CODE (term) != SYMBOL_REF
5388 && GET_CODE (term) != LABEL_REF)
5394 term = ix86_delegitimize_address (x);
5396 if (GET_CODE (term) != SYMBOL_REF
5397 && GET_CODE (term) != LABEL_REF)
5403 /* Determine if a given RTX is a valid constant. We already know this
5404 satisfies CONSTANT_P. */
5407 legitimate_constant_p (x)
5412 switch (GET_CODE (x))
5415 /* TLS symbols are not constant. */
5416 if (tls_symbolic_operand (x, Pmode))
5421 inner = XEXP (x, 0);
5423 /* Offsets of TLS symbols are never valid.
5424 Discourage CSE from creating them. */
5425 if (GET_CODE (inner) == PLUS
5426 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5429 /* Only some unspecs are valid as "constants". */
5430 if (GET_CODE (inner) == UNSPEC)
5431 switch (XINT (inner, 1))
5434 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5444 /* Otherwise we handle everything else in the move patterns. */
5448 /* Determine if it's legal to put X into the constant pool. This
5449 is not possible for the address of thread-local symbols, which
5450 is checked above. */
5453 ix86_cannot_force_const_mem (x)
5456 return !legitimate_constant_p (x);
5459 /* Determine if a given RTX is a valid constant address. */
5462 constant_address_p (x)
5465 switch (GET_CODE (x))
5472 return TARGET_64BIT;
5475 /* For Mach-O, really believe the CONST. */
5478 /* Otherwise fall through. */
5480 return !flag_pic && legitimate_constant_p (x);
5487 /* Nonzero if the constant value X is a legitimate general operand
5488 when generating PIC code. It is given that flag_pic is on and
5489 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5492 legitimate_pic_operand_p (x)
5497 switch (GET_CODE (x))
5500 inner = XEXP (x, 0);
5502 /* Only some unspecs are valid as "constants". */
5503 if (GET_CODE (inner) == UNSPEC)
5504 switch (XINT (inner, 1))
5507 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5515 return legitimate_pic_address_disp_p (x);
5522 /* Determine if a given CONST RTX is a valid memory displacement
5526 legitimate_pic_address_disp_p (disp)
5531 /* In 64bit mode we can allow direct addresses of symbols and labels
5532 when they are not dynamic symbols. */
5535 /* TLS references should always be enclosed in UNSPEC. */
5536 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5538 if (GET_CODE (disp) == SYMBOL_REF
5539 && ix86_cmodel == CM_SMALL_PIC
5540 && (CONSTANT_POOL_ADDRESS_P (disp)
5541 || SYMBOL_REF_FLAG (disp)))
5543 if (GET_CODE (disp) == LABEL_REF)
5545 if (GET_CODE (disp) == CONST
5546 && GET_CODE (XEXP (disp, 0)) == PLUS
5547 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5548 && ix86_cmodel == CM_SMALL_PIC
5549 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5550 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5551 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5552 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5553 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5554 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5557 if (GET_CODE (disp) != CONST)
5559 disp = XEXP (disp, 0);
5563 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5564 of GOT tables. We should not need these anyway. */
5565 if (GET_CODE (disp) != UNSPEC
5566 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5569 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5570 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5576 if (GET_CODE (disp) == PLUS)
5578 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5580 disp = XEXP (disp, 0);
5584 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5585 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5587 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5588 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5589 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5591 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5592 if (strstr (sym_name, "$pb") != 0)
5597 if (GET_CODE (disp) != UNSPEC)
5600 switch (XINT (disp, 1))
5605 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5607 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5608 case UNSPEC_GOTTPOFF:
5609 case UNSPEC_GOTNTPOFF:
5610 case UNSPEC_INDNTPOFF:
5613 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5615 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5617 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5623 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5624 memory address for an instruction. The MODE argument is the machine mode
5625 for the MEM expression that wants to use this address.
5627 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5628 convert common non-canonical forms to canonical form so that they will
5632 legitimate_address_p (mode, addr, strict)
5633 enum machine_mode mode;
5637 struct ix86_address parts;
5638 rtx base, index, disp;
5639 HOST_WIDE_INT scale;
5640 const char *reason = NULL;
5641 rtx reason_rtx = NULL_RTX;
5643 if (TARGET_DEBUG_ADDR)
5646 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5647 GET_MODE_NAME (mode), strict);
5651 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5653 if (TARGET_DEBUG_ADDR)
5654 fprintf (stderr, "Success.\n");
5658 if (ix86_decompose_address (addr, &parts) <= 0)
5660 reason = "decomposition failed";
5665 index = parts.index;
5667 scale = parts.scale;
5669 /* Validate base register.
5671 Don't allow SUBREG's here, it can lead to spill failures when the base
5672 is one word out of a two word structure, which is represented internally
5680 if (GET_CODE (base) == SUBREG)
5681 reg = SUBREG_REG (base);
5685 if (GET_CODE (reg) != REG)
5687 reason = "base is not a register";
5691 if (GET_MODE (base) != Pmode)
5693 reason = "base is not in Pmode";
5697 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5698 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5700 reason = "base is not valid";
5705 /* Validate index register.
5707 Don't allow SUBREG's here, it can lead to spill failures when the index
5708 is one word out of a two word structure, which is represented internally
5716 if (GET_CODE (index) == SUBREG)
5717 reg = SUBREG_REG (index);
5721 if (GET_CODE (reg) != REG)
5723 reason = "index is not a register";
5727 if (GET_MODE (index) != Pmode)
5729 reason = "index is not in Pmode";
5733 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5734 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5736 reason = "index is not valid";
5741 /* Validate scale factor. */
5744 reason_rtx = GEN_INT (scale);
5747 reason = "scale without index";
5751 if (scale != 2 && scale != 4 && scale != 8)
5753 reason = "scale is not a valid multiplier";
5758 /* Validate displacement. */
5763 if (GET_CODE (disp) == CONST
5764 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5765 switch (XINT (XEXP (disp, 0), 1))
5769 case UNSPEC_GOTPCREL:
5772 goto is_legitimate_pic;
5774 case UNSPEC_GOTTPOFF:
5775 case UNSPEC_GOTNTPOFF:
5776 case UNSPEC_INDNTPOFF:
5782 reason = "invalid address unspec";
5786 else if (flag_pic && (SYMBOLIC_CONST (disp)
5788 && !machopic_operand_p (disp)
5793 if (TARGET_64BIT && (index || base))
5795 /* foo@dtpoff(%rX) is ok. */
5796 if (GET_CODE (disp) != CONST
5797 || GET_CODE (XEXP (disp, 0)) != PLUS
5798 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5799 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5800 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5801 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5803 reason = "non-constant pic memory reference";
5807 else if (! legitimate_pic_address_disp_p (disp))
5809 reason = "displacement is an invalid pic construct";
5813 /* This code used to verify that a symbolic pic displacement
5814 includes the pic_offset_table_rtx register.
5816 While this is good idea, unfortunately these constructs may
5817 be created by "adds using lea" optimization for incorrect
5826 This code is nonsensical, but results in addressing
5827 GOT table with pic_offset_table_rtx base. We can't
5828 just refuse it easily, since it gets matched by
5829 "addsi3" pattern, that later gets split to lea in the
5830 case output register differs from input. While this
5831 can be handled by separate addsi pattern for this case
5832 that never results in lea, this seems to be easier and
5833 correct fix for crash to disable this test. */
5835 else if (!CONSTANT_ADDRESS_P (disp))
5837 reason = "displacement is not constant";
5840 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5842 reason = "displacement is out of range";
5845 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5847 reason = "displacement is a const_double";
5852 /* Everything looks valid. */
5853 if (TARGET_DEBUG_ADDR)
5854 fprintf (stderr, "Success.\n");
5858 if (TARGET_DEBUG_ADDR)
5860 fprintf (stderr, "Error: %s\n", reason);
5861 debug_rtx (reason_rtx);
5866 /* Return an unique alias set for the GOT. */
5868 static HOST_WIDE_INT
5869 ix86_GOT_alias_set ()
5871 static HOST_WIDE_INT set = -1;
5873 set = new_alias_set ();
5877 /* Return a legitimate reference for ORIG (an address) using the
5878 register REG. If REG is 0, a new pseudo is generated.
5880 There are two types of references that must be handled:
5882 1. Global data references must load the address from the GOT, via
5883 the PIC reg. An insn is emitted to do this load, and the reg is
5886 2. Static data references, constant pool addresses, and code labels
5887 compute the address as an offset from the GOT, whose base is in
5888 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5889 differentiate them from global data objects. The returned
5890 address is the PIC reg + an unspec constant.
5892 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5893 reg also appears in the address. */
5896 legitimize_pic_address (orig, reg)
5906 reg = gen_reg_rtx (Pmode);
5907 /* Use the generic Mach-O PIC machinery. */
5908 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5911 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5913 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5915 /* This symbol may be referenced via a displacement from the PIC
5916 base address (@GOTOFF). */
5918 if (reload_in_progress)
5919 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5920 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5921 new = gen_rtx_CONST (Pmode, new);
5922 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5926 emit_move_insn (reg, new);
5930 else if (GET_CODE (addr) == SYMBOL_REF)
5934 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5935 new = gen_rtx_CONST (Pmode, new);
5936 new = gen_rtx_MEM (Pmode, new);
5937 RTX_UNCHANGING_P (new) = 1;
5938 set_mem_alias_set (new, ix86_GOT_alias_set ());
5941 reg = gen_reg_rtx (Pmode);
5942 /* Use directly gen_movsi, otherwise the address is loaded
5943 into register for CSE. We don't want to CSE this addresses,
5944 instead we CSE addresses from the GOT table, so skip this. */
5945 emit_insn (gen_movsi (reg, new));
5950 /* This symbol must be referenced via a load from the
5951 Global Offset Table (@GOT). */
5953 if (reload_in_progress)
5954 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5955 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5956 new = gen_rtx_CONST (Pmode, new);
5957 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5958 new = gen_rtx_MEM (Pmode, new);
5959 RTX_UNCHANGING_P (new) = 1;
5960 set_mem_alias_set (new, ix86_GOT_alias_set ());
5963 reg = gen_reg_rtx (Pmode);
5964 emit_move_insn (reg, new);
5970 if (GET_CODE (addr) == CONST)
5972 addr = XEXP (addr, 0);
5974 /* We must match stuff we generate before. Assume the only
5975 unspecs that can get here are ours. Not that we could do
5976 anything with them anyway... */
5977 if (GET_CODE (addr) == UNSPEC
5978 || (GET_CODE (addr) == PLUS
5979 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5981 if (GET_CODE (addr) != PLUS)
5984 if (GET_CODE (addr) == PLUS)
5986 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5988 /* Check first to see if this is a constant offset from a @GOTOFF
5989 symbol reference. */
5990 if (local_symbolic_operand (op0, Pmode)
5991 && GET_CODE (op1) == CONST_INT)
5995 if (reload_in_progress)
5996 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5997 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5999 new = gen_rtx_PLUS (Pmode, new, op1);
6000 new = gen_rtx_CONST (Pmode, new);
6001 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6005 emit_move_insn (reg, new);
6011 if (INTVAL (op1) < -16*1024*1024
6012 || INTVAL (op1) >= 16*1024*1024)
6013 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6018 base = legitimize_pic_address (XEXP (addr, 0), reg);
6019 new = legitimize_pic_address (XEXP (addr, 1),
6020 base == reg ? NULL_RTX : reg);
6022 if (GET_CODE (new) == CONST_INT)
6023 new = plus_constant (base, INTVAL (new));
6026 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6028 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6029 new = XEXP (new, 1);
6031 new = gen_rtx_PLUS (Pmode, base, new);
6040 ix86_encode_section_info (decl, first)
6042 int first ATTRIBUTE_UNUSED;
6044 bool local_p = (*targetm.binds_local_p) (decl);
6047 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6048 if (GET_CODE (rtl) != MEM)
6050 symbol = XEXP (rtl, 0);
6051 if (GET_CODE (symbol) != SYMBOL_REF)
6054 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6055 symbol so that we may access it directly in the GOT. */
6058 SYMBOL_REF_FLAG (symbol) = local_p;
6060 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6061 "local dynamic", "initial exec" or "local exec" TLS models
6064 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6066 const char *symbol_str;
6069 enum tls_model kind = decl_tls_model (decl);
6071 if (TARGET_64BIT && ! flag_pic)
6073 /* x86-64 doesn't allow non-pic code for shared libraries,
6074 so don't generate GD/LD TLS models for non-pic code. */
6077 case TLS_MODEL_GLOBAL_DYNAMIC:
6078 kind = TLS_MODEL_INITIAL_EXEC; break;
6079 case TLS_MODEL_LOCAL_DYNAMIC:
6080 kind = TLS_MODEL_LOCAL_EXEC; break;
6086 symbol_str = XSTR (symbol, 0);
6088 if (symbol_str[0] == '%')
6090 if (symbol_str[1] == tls_model_chars[kind])
6094 len = strlen (symbol_str) + 1;
6095 newstr = alloca (len + 2);
6098 newstr[1] = tls_model_chars[kind];
6099 memcpy (newstr + 2, symbol_str, len);
6101 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6105 /* Undo the above when printing symbol names. */
6108 ix86_strip_name_encoding (str)
6118 /* Load the thread pointer into a register. */
6121 get_thread_pointer ()
6125 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6126 tp = gen_rtx_MEM (Pmode, tp);
6127 RTX_UNCHANGING_P (tp) = 1;
6128 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6129 tp = force_reg (Pmode, tp);
6134 /* Try machine-dependent ways of modifying an illegitimate address
6135 to be legitimate. If we find one, return the new, valid address.
6136 This macro is used in only one place: `memory_address' in explow.c.
6138 OLDX is the address as it was before break_out_memory_refs was called.
6139 In some cases it is useful to look at this to decide what needs to be done.
6141 MODE and WIN are passed so that this macro can use
6142 GO_IF_LEGITIMATE_ADDRESS.
6144 It is always safe for this macro to do nothing. It exists to recognize
6145 opportunities to optimize the output.
6147 For the 80386, we handle X+REG by loading X into a register R and
6148 using R+REG. R will go in a general reg and indexing will be used.
6149 However, if REG is a broken-out memory address or multiplication,
6150 nothing needs to be done because REG can certainly go in a general reg.
6152 When -fpic is used, special handling is needed for symbolic references.
6153 See comments by legitimize_pic_address in i386.c for details. */
6156 legitimize_address (x, oldx, mode)
6158 register rtx oldx ATTRIBUTE_UNUSED;
6159 enum machine_mode mode;
6164 if (TARGET_DEBUG_ADDR)
6166 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6167 GET_MODE_NAME (mode));
6171 log = tls_symbolic_operand (x, mode);
6174 rtx dest, base, off, pic;
6179 case TLS_MODEL_GLOBAL_DYNAMIC:
6180 dest = gen_reg_rtx (Pmode);
6183 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6186 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6187 insns = get_insns ();
6190 emit_libcall_block (insns, dest, rax, x);
6193 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6196 case TLS_MODEL_LOCAL_DYNAMIC:
6197 base = gen_reg_rtx (Pmode);
6200 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6203 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6204 insns = get_insns ();
6207 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6208 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6209 emit_libcall_block (insns, base, rax, note);
6212 emit_insn (gen_tls_local_dynamic_base_32 (base));
6214 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6215 off = gen_rtx_CONST (Pmode, off);
6217 return gen_rtx_PLUS (Pmode, base, off);
6219 case TLS_MODEL_INITIAL_EXEC:
6223 type = UNSPEC_GOTNTPOFF;
6227 if (reload_in_progress)
6228 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6229 pic = pic_offset_table_rtx;
6230 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6232 else if (!TARGET_GNU_TLS)
6234 pic = gen_reg_rtx (Pmode);
6235 emit_insn (gen_set_got (pic));
6236 type = UNSPEC_GOTTPOFF;
6241 type = UNSPEC_INDNTPOFF;
6244 base = get_thread_pointer ();
6246 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6247 off = gen_rtx_CONST (Pmode, off);
6249 off = gen_rtx_PLUS (Pmode, pic, off);
6250 off = gen_rtx_MEM (Pmode, off);
6251 RTX_UNCHANGING_P (off) = 1;
6252 set_mem_alias_set (off, ix86_GOT_alias_set ());
6253 dest = gen_reg_rtx (Pmode);
6255 if (TARGET_64BIT || TARGET_GNU_TLS)
6257 emit_move_insn (dest, off);
6258 return gen_rtx_PLUS (Pmode, base, dest);
6261 emit_insn (gen_subsi3 (dest, base, off));
6264 case TLS_MODEL_LOCAL_EXEC:
6265 base = get_thread_pointer ();
6267 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6268 (TARGET_64BIT || TARGET_GNU_TLS)
6269 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6270 off = gen_rtx_CONST (Pmode, off);
6272 if (TARGET_64BIT || TARGET_GNU_TLS)
6273 return gen_rtx_PLUS (Pmode, base, off);
6276 dest = gen_reg_rtx (Pmode);
6277 emit_insn (gen_subsi3 (dest, base, off));
6288 if (flag_pic && SYMBOLIC_CONST (x))
6289 return legitimize_pic_address (x, 0);
6291 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6292 if (GET_CODE (x) == ASHIFT
6293 && GET_CODE (XEXP (x, 1)) == CONST_INT
6294 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6297 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6298 GEN_INT (1 << log));
6301 if (GET_CODE (x) == PLUS)
6303 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6305 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6306 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6307 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6310 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6311 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6312 GEN_INT (1 << log));
6315 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6316 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6317 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6320 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6321 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6322 GEN_INT (1 << log));
6325 /* Put multiply first if it isn't already. */
6326 if (GET_CODE (XEXP (x, 1)) == MULT)
6328 rtx tmp = XEXP (x, 0);
6329 XEXP (x, 0) = XEXP (x, 1);
6334 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6335 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6336 created by virtual register instantiation, register elimination, and
6337 similar optimizations. */
6338 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6341 x = gen_rtx_PLUS (Pmode,
6342 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6343 XEXP (XEXP (x, 1), 0)),
6344 XEXP (XEXP (x, 1), 1));
6348 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6349 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6350 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6351 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6352 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6353 && CONSTANT_P (XEXP (x, 1)))
6356 rtx other = NULL_RTX;
6358 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6360 constant = XEXP (x, 1);
6361 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6363 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6365 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6366 other = XEXP (x, 1);
6374 x = gen_rtx_PLUS (Pmode,
6375 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6376 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6377 plus_constant (other, INTVAL (constant)));
6381 if (changed && legitimate_address_p (mode, x, FALSE))
6384 if (GET_CODE (XEXP (x, 0)) == MULT)
6387 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6390 if (GET_CODE (XEXP (x, 1)) == MULT)
6393 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6397 && GET_CODE (XEXP (x, 1)) == REG
6398 && GET_CODE (XEXP (x, 0)) == REG)
6401 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6404 x = legitimize_pic_address (x, 0);
6407 if (changed && legitimate_address_p (mode, x, FALSE))
6410 if (GET_CODE (XEXP (x, 0)) == REG)
6412 register rtx temp = gen_reg_rtx (Pmode);
6413 register rtx val = force_operand (XEXP (x, 1), temp);
6415 emit_move_insn (temp, val);
6421 else if (GET_CODE (XEXP (x, 1)) == REG)
6423 register rtx temp = gen_reg_rtx (Pmode);
6424 register rtx val = force_operand (XEXP (x, 0), temp);
6426 emit_move_insn (temp, val);
6436 /* Print an integer constant expression in assembler syntax. Addition
6437 and subtraction are the only arithmetic that may appear in these
6438 expressions. FILE is the stdio stream to write to, X is the rtx, and
6439 CODE is the operand print code from the output string. */
6442 output_pic_addr_const (file, x, code)
6449 switch (GET_CODE (x))
6459 assemble_name (file, XSTR (x, 0));
6460 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6461 fputs ("@PLT", file);
6468 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6469 assemble_name (asm_out_file, buf);
6473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6477 /* This used to output parentheses around the expression,
6478 but that does not work on the 386 (either ATT or BSD assembler). */
6479 output_pic_addr_const (file, XEXP (x, 0), code);
6483 if (GET_MODE (x) == VOIDmode)
6485 /* We can use %d if the number is <32 bits and positive. */
6486 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6487 fprintf (file, "0x%lx%08lx",
6488 (unsigned long) CONST_DOUBLE_HIGH (x),
6489 (unsigned long) CONST_DOUBLE_LOW (x));
6491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6494 /* We can't handle floating point constants;
6495 PRINT_OPERAND must handle them. */
6496 output_operand_lossage ("floating constant misused");
6500 /* Some assemblers need integer constants to appear first. */
6501 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6503 output_pic_addr_const (file, XEXP (x, 0), code);
6505 output_pic_addr_const (file, XEXP (x, 1), code);
6507 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6509 output_pic_addr_const (file, XEXP (x, 1), code);
6511 output_pic_addr_const (file, XEXP (x, 0), code);
6519 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6520 output_pic_addr_const (file, XEXP (x, 0), code);
6522 output_pic_addr_const (file, XEXP (x, 1), code);
6524 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6528 if (XVECLEN (x, 0) != 1)
6530 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6531 switch (XINT (x, 1))
6534 fputs ("@GOT", file);
6537 fputs ("@GOTOFF", file);
6539 case UNSPEC_GOTPCREL:
6540 fputs ("@GOTPCREL(%rip)", file);
6542 case UNSPEC_GOTTPOFF:
6543 /* FIXME: This might be @TPOFF in Sun ld too. */
6544 fputs ("@GOTTPOFF", file);
6547 fputs ("@TPOFF", file);
6551 fputs ("@TPOFF", file);
6553 fputs ("@NTPOFF", file);
6556 fputs ("@DTPOFF", file);
6558 case UNSPEC_GOTNTPOFF:
6560 fputs ("@GOTTPOFF(%rip)", file);
6562 fputs ("@GOTNTPOFF", file);
6564 case UNSPEC_INDNTPOFF:
6565 fputs ("@INDNTPOFF", file);
6568 output_operand_lossage ("invalid UNSPEC as operand");
6574 output_operand_lossage ("invalid expression as operand");
6578 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6579 We need to handle our special PIC relocations. */
6582 i386_dwarf_output_addr_const (file, x)
6587 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6591 fprintf (file, "%s", ASM_LONG);
6594 output_pic_addr_const (file, x, '\0');
6596 output_addr_const (file, x);
6600 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6601 We need to emit DTP-relative relocations. */
6604 i386_output_dwarf_dtprel (file, size, x)
6609 fputs (ASM_LONG, file);
6610 output_addr_const (file, x);
6611 fputs ("@DTPOFF", file);
6617 fputs (", 0", file);
6624 /* In the name of slightly smaller debug output, and to cater to
6625 general assembler losage, recognize PIC+GOTOFF and turn it back
6626 into a direct symbol reference. */
6629 ix86_delegitimize_address (orig_x)
6634 if (GET_CODE (x) == MEM)
6639 if (GET_CODE (x) != CONST
6640 || GET_CODE (XEXP (x, 0)) != UNSPEC
6641 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6642 || GET_CODE (orig_x) != MEM)
6644 return XVECEXP (XEXP (x, 0), 0, 0);
6647 if (GET_CODE (x) != PLUS
6648 || GET_CODE (XEXP (x, 1)) != CONST)
6651 if (GET_CODE (XEXP (x, 0)) == REG
6652 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6653 /* %ebx + GOT/GOTOFF */
6655 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6657 /* %ebx + %reg * scale + GOT/GOTOFF */
6659 if (GET_CODE (XEXP (y, 0)) == REG
6660 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6662 else if (GET_CODE (XEXP (y, 1)) == REG
6663 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6667 if (GET_CODE (y) != REG
6668 && GET_CODE (y) != MULT
6669 && GET_CODE (y) != ASHIFT)
6675 x = XEXP (XEXP (x, 1), 0);
6676 if (GET_CODE (x) == UNSPEC
6677 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6678 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6681 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6682 return XVECEXP (x, 0, 0);
6685 if (GET_CODE (x) == PLUS
6686 && GET_CODE (XEXP (x, 0)) == UNSPEC
6687 && GET_CODE (XEXP (x, 1)) == CONST_INT
6688 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6689 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6690 && GET_CODE (orig_x) != MEM)))
6692 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6694 return gen_rtx_PLUS (Pmode, y, x);
6702 put_condition_code (code, mode, reverse, fp, file)
6704 enum machine_mode mode;
6710 if (mode == CCFPmode || mode == CCFPUmode)
6712 enum rtx_code second_code, bypass_code;
6713 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6714 if (bypass_code != NIL || second_code != NIL)
6716 code = ix86_fp_compare_code_to_integer (code);
6720 code = reverse_condition (code);
6731 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6736 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6737 Those same assemblers have the same but opposite losage on cmov. */
6740 suffix = fp ? "nbe" : "a";
6743 if (mode == CCNOmode || mode == CCGOCmode)
6745 else if (mode == CCmode || mode == CCGCmode)
6756 if (mode == CCNOmode || mode == CCGOCmode)
6758 else if (mode == CCmode || mode == CCGCmode)
6767 suffix = fp ? "nb" : "ae";
6770 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6780 suffix = fp ? "u" : "p";
6783 suffix = fp ? "nu" : "np";
6788 fputs (suffix, file);
6792 print_reg (x, code, file)
6797 if (REGNO (x) == ARG_POINTER_REGNUM
6798 || REGNO (x) == FRAME_POINTER_REGNUM
6799 || REGNO (x) == FLAGS_REG
6800 || REGNO (x) == FPSR_REG)
6803 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6806 if (code == 'w' || MMX_REG_P (x))
6808 else if (code == 'b')
6810 else if (code == 'k')
6812 else if (code == 'q')
6814 else if (code == 'y')
6816 else if (code == 'h')
6819 code = GET_MODE_SIZE (GET_MODE (x));
6821 /* Irritatingly, AMD extended registers use different naming convention
6822 from the normal registers. */
6823 if (REX_INT_REG_P (x))
6830 error ("extended registers have no high halves");
6833 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6836 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6839 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6842 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6845 error ("unsupported operand size for extended register");
6853 if (STACK_TOP_P (x))
6855 fputs ("st(0)", file);
6862 if (! ANY_FP_REG_P (x))
6863 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6867 fputs (hi_reg_name[REGNO (x)], file);
6870 fputs (qi_reg_name[REGNO (x)], file);
6873 fputs (qi_high_reg_name[REGNO (x)], file);
6880 /* Locate some local-dynamic symbol still in use by this function
6881 so that we can print its name in some tls_local_dynamic_base
6885 get_some_local_dynamic_name ()
6889 if (cfun->machine->some_ld_name)
6890 return cfun->machine->some_ld_name;
6892 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6894 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6895 return cfun->machine->some_ld_name;
6901 get_some_local_dynamic_name_1 (px, data)
6903 void *data ATTRIBUTE_UNUSED;
6907 if (GET_CODE (x) == SYMBOL_REF
6908 && local_dynamic_symbolic_operand (x, Pmode))
6910 cfun->machine->some_ld_name = XSTR (x, 0);
6918 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6919 C -- print opcode suffix for set/cmov insn.
6920 c -- like C, but print reversed condition
6921 F,f -- likewise, but for floating-point.
6922 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6924 R -- print the prefix for register names.
6925 z -- print the opcode suffix for the size of the current operand.
6926 * -- print a star (in certain assembler syntax)
6927 A -- print an absolute memory reference.
6928 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6929 s -- print a shift double count, followed by the assemblers argument
6931 b -- print the QImode name of the register for the indicated operand.
6932 %b0 would print %al if operands[0] is reg 0.
6933 w -- likewise, print the HImode name of the register.
6934 k -- likewise, print the SImode name of the register.
6935 q -- likewise, print the DImode name of the register.
6936 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6937 y -- print "st(0)" instead of "st" as a register.
6938 D -- print condition for SSE cmp instruction.
6939 P -- if PIC, print an @PLT suffix.
6940 X -- don't print any sort of PIC '@' suffix for a symbol.
6941 & -- print some in-use local-dynamic symbol name.
6945 print_operand (file, x, code)
6955 if (ASSEMBLER_DIALECT == ASM_ATT)
6960 assemble_name (file, get_some_local_dynamic_name ());
6964 if (ASSEMBLER_DIALECT == ASM_ATT)
6966 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6968 /* Intel syntax. For absolute addresses, registers should not
6969 be surrounded by braces. */
6970 if (GET_CODE (x) != REG)
6973 PRINT_OPERAND (file, x, 0);
6981 PRINT_OPERAND (file, x, 0);
6986 if (ASSEMBLER_DIALECT == ASM_ATT)
6991 if (ASSEMBLER_DIALECT == ASM_ATT)
6996 if (ASSEMBLER_DIALECT == ASM_ATT)
7001 if (ASSEMBLER_DIALECT == ASM_ATT)
7006 if (ASSEMBLER_DIALECT == ASM_ATT)
7011 if (ASSEMBLER_DIALECT == ASM_ATT)
7016 /* 387 opcodes don't get size suffixes if the operands are
7018 if (STACK_REG_P (x))
7021 /* Likewise if using Intel opcodes. */
7022 if (ASSEMBLER_DIALECT == ASM_INTEL)
7025 /* This is the size of op from size of operand. */
7026 switch (GET_MODE_SIZE (GET_MODE (x)))
7029 #ifdef HAVE_GAS_FILDS_FISTS
7035 if (GET_MODE (x) == SFmode)
7050 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7052 #ifdef GAS_MNEMONICS
7078 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7080 PRINT_OPERAND (file, x, 0);
7086 /* Little bit of braindamage here. The SSE compare instructions
7087 does use completely different names for the comparisons that the
7088 fp conditional moves. */
7089 switch (GET_CODE (x))
7104 fputs ("unord", file);
7108 fputs ("neq", file);
7112 fputs ("nlt", file);
7116 fputs ("nle", file);
7119 fputs ("ord", file);
7127 #ifdef CMOV_SUN_AS_SYNTAX
7128 if (ASSEMBLER_DIALECT == ASM_ATT)
7130 switch (GET_MODE (x))
7132 case HImode: putc ('w', file); break;
7134 case SFmode: putc ('l', file); break;
7136 case DFmode: putc ('q', file); break;
7144 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7147 #ifdef CMOV_SUN_AS_SYNTAX
7148 if (ASSEMBLER_DIALECT == ASM_ATT)
7151 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7154 /* Like above, but reverse condition */
7156 /* Check to see if argument to %c is really a constant
7157 and not a condition code which needs to be reversed. */
7158 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7160 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7163 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7166 #ifdef CMOV_SUN_AS_SYNTAX
7167 if (ASSEMBLER_DIALECT == ASM_ATT)
7170 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7176 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7179 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7182 int pred_val = INTVAL (XEXP (x, 0));
7184 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7185 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7187 int taken = pred_val > REG_BR_PROB_BASE / 2;
7188 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7190 /* Emit hints only in the case default branch prediction
7191 heuristics would fail. */
7192 if (taken != cputaken)
7194 /* We use 3e (DS) prefix for taken branches and
7195 2e (CS) prefix for not taken branches. */
7197 fputs ("ds ; ", file);
7199 fputs ("cs ; ", file);
7206 output_operand_lossage ("invalid operand code `%c'", code);
7210 if (GET_CODE (x) == REG)
7212 PRINT_REG (x, code, file);
7215 else if (GET_CODE (x) == MEM)
7217 /* No `byte ptr' prefix for call instructions. */
7218 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7221 switch (GET_MODE_SIZE (GET_MODE (x)))
7223 case 1: size = "BYTE"; break;
7224 case 2: size = "WORD"; break;
7225 case 4: size = "DWORD"; break;
7226 case 8: size = "QWORD"; break;
7227 case 12: size = "XWORD"; break;
7228 case 16: size = "XMMWORD"; break;
7233 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7236 else if (code == 'w')
7238 else if (code == 'k')
7242 fputs (" PTR ", file);
7246 if (flag_pic && CONSTANT_ADDRESS_P (x))
7247 output_pic_addr_const (file, x, code);
7248 /* Avoid (%rip) for call operands. */
7249 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7250 && GET_CODE (x) != CONST_INT)
7251 output_addr_const (file, x);
7252 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7253 output_operand_lossage ("invalid constraints for operand");
7258 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7263 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7264 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7266 if (ASSEMBLER_DIALECT == ASM_ATT)
7268 fprintf (file, "0x%lx", l);
7271 /* These float cases don't actually occur as immediate operands. */
7272 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7276 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7277 fprintf (file, "%s", dstr);
7280 else if (GET_CODE (x) == CONST_DOUBLE
7281 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7285 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7286 fprintf (file, "%s", dstr);
7293 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7295 if (ASSEMBLER_DIALECT == ASM_ATT)
7298 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7299 || GET_CODE (x) == LABEL_REF)
7301 if (ASSEMBLER_DIALECT == ASM_ATT)
7304 fputs ("OFFSET FLAT:", file);
7307 if (GET_CODE (x) == CONST_INT)
7308 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7310 output_pic_addr_const (file, x, code);
7312 output_addr_const (file, x);
7316 /* Print a memory operand whose address is ADDR. */
7319 print_operand_address (file, addr)
7323 struct ix86_address parts;
7324 rtx base, index, disp;
7327 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7329 if (ASSEMBLER_DIALECT == ASM_INTEL)
7330 fputs ("DWORD PTR ", file);
7331 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7334 fputs ("fs:0", file);
7336 fputs ("gs:0", file);
7340 if (! ix86_decompose_address (addr, &parts))
7344 index = parts.index;
7346 scale = parts.scale;
7348 if (!base && !index)
7350 /* Displacement only requires special attention. */
7352 if (GET_CODE (disp) == CONST_INT)
7354 if (ASSEMBLER_DIALECT == ASM_INTEL)
7356 if (USER_LABEL_PREFIX[0] == 0)
7358 fputs ("ds:", file);
7360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7363 output_pic_addr_const (file, addr, 0);
7365 output_addr_const (file, addr);
7367 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7369 && ((GET_CODE (addr) == SYMBOL_REF
7370 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7371 || GET_CODE (addr) == LABEL_REF
7372 || (GET_CODE (addr) == CONST
7373 && GET_CODE (XEXP (addr, 0)) == PLUS
7374 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7375 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7376 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7377 fputs ("(%rip)", file);
7381 if (ASSEMBLER_DIALECT == ASM_ATT)
7386 output_pic_addr_const (file, disp, 0);
7387 else if (GET_CODE (disp) == LABEL_REF)
7388 output_asm_label (disp);
7390 output_addr_const (file, disp);
7395 PRINT_REG (base, 0, file);
7399 PRINT_REG (index, 0, file);
7401 fprintf (file, ",%d", scale);
7407 rtx offset = NULL_RTX;
7411 /* Pull out the offset of a symbol; print any symbol itself. */
7412 if (GET_CODE (disp) == CONST
7413 && GET_CODE (XEXP (disp, 0)) == PLUS
7414 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7416 offset = XEXP (XEXP (disp, 0), 1);
7417 disp = gen_rtx_CONST (VOIDmode,
7418 XEXP (XEXP (disp, 0), 0));
7422 output_pic_addr_const (file, disp, 0);
7423 else if (GET_CODE (disp) == LABEL_REF)
7424 output_asm_label (disp);
7425 else if (GET_CODE (disp) == CONST_INT)
7428 output_addr_const (file, disp);
7434 PRINT_REG (base, 0, file);
7437 if (INTVAL (offset) >= 0)
7439 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7443 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7450 PRINT_REG (index, 0, file);
7452 fprintf (file, "*%d", scale);
7460 output_addr_const_extra (file, x)
7466 if (GET_CODE (x) != UNSPEC)
7469 op = XVECEXP (x, 0, 0);
7470 switch (XINT (x, 1))
7472 case UNSPEC_GOTTPOFF:
7473 output_addr_const (file, op);
7474 /* FIXME: This might be @TPOFF in Sun ld. */
7475 fputs ("@GOTTPOFF", file);
7478 output_addr_const (file, op);
7479 fputs ("@TPOFF", file);
7482 output_addr_const (file, op);
7484 fputs ("@TPOFF", file);
7486 fputs ("@NTPOFF", file);
7489 output_addr_const (file, op);
7490 fputs ("@DTPOFF", file);
7492 case UNSPEC_GOTNTPOFF:
7493 output_addr_const (file, op);
7495 fputs ("@GOTTPOFF(%rip)", file);
7497 fputs ("@GOTNTPOFF", file);
7499 case UNSPEC_INDNTPOFF:
7500 output_addr_const (file, op);
7501 fputs ("@INDNTPOFF", file);
7511 /* Split one or more DImode RTL references into pairs of SImode
7512 references. The RTL can be REG, offsettable MEM, integer constant, or
7513 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7514 split and "num" is its length. lo_half and hi_half are output arrays
7515 that parallel "operands". */
7518 split_di (operands, num, lo_half, hi_half)
7521 rtx lo_half[], hi_half[];
7525 rtx op = operands[num];
7527 /* simplify_subreg refuse to split volatile memory addresses,
7528 but we still have to handle it. */
7529 if (GET_CODE (op) == MEM)
7531 lo_half[num] = adjust_address (op, SImode, 0);
7532 hi_half[num] = adjust_address (op, SImode, 4);
7536 lo_half[num] = simplify_gen_subreg (SImode, op,
7537 GET_MODE (op) == VOIDmode
7538 ? DImode : GET_MODE (op), 0);
7539 hi_half[num] = simplify_gen_subreg (SImode, op,
7540 GET_MODE (op) == VOIDmode
7541 ? DImode : GET_MODE (op), 4);
7545 /* Split one or more TImode RTL references into pairs of SImode
7546 references. The RTL can be REG, offsettable MEM, integer constant, or
7547 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7548 split and "num" is its length. lo_half and hi_half are output arrays
7549 that parallel "operands". */
7552 split_ti (operands, num, lo_half, hi_half)
7555 rtx lo_half[], hi_half[];
7559 rtx op = operands[num];
7561 /* simplify_subreg refuse to split volatile memory addresses, but we
7562 still have to handle it. */
7563 if (GET_CODE (op) == MEM)
7565 lo_half[num] = adjust_address (op, DImode, 0);
7566 hi_half[num] = adjust_address (op, DImode, 8);
7570 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7571 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7576 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7577 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7578 is the expression of the binary operation. The output may either be
7579 emitted here, or returned to the caller, like all output_* functions.
7581 There is no guarantee that the operands are the same mode, as they
7582 might be within FLOAT or FLOAT_EXTEND expressions. */
7584 #ifndef SYSV386_COMPAT
7585 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7586 wants to fix the assemblers because that causes incompatibility
7587 with gcc. No-one wants to fix gcc because that causes
7588 incompatibility with assemblers... You can use the option of
7589 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7590 #define SYSV386_COMPAT 1
7594 output_387_binary_op (insn, operands)
7598 static char buf[30];
7601 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7603 #ifdef ENABLE_CHECKING
7604 /* Even if we do not want to check the inputs, this documents input
7605 constraints. Which helps in understanding the following code. */
7606 if (STACK_REG_P (operands[0])
7607 && ((REG_P (operands[1])
7608 && REGNO (operands[0]) == REGNO (operands[1])
7609 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7610 || (REG_P (operands[2])
7611 && REGNO (operands[0]) == REGNO (operands[2])
7612 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7613 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7619 switch (GET_CODE (operands[3]))
7622 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7623 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7631 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7632 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7640 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7641 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7649 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7650 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7664 if (GET_MODE (operands[0]) == SFmode)
7665 strcat (buf, "ss\t{%2, %0|%0, %2}");
7667 strcat (buf, "sd\t{%2, %0|%0, %2}");
7672 switch (GET_CODE (operands[3]))
7676 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7678 rtx temp = operands[2];
7679 operands[2] = operands[1];
7683 /* know operands[0] == operands[1]. */
7685 if (GET_CODE (operands[2]) == MEM)
7691 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7693 if (STACK_TOP_P (operands[0]))
7694 /* How is it that we are storing to a dead operand[2]?
7695 Well, presumably operands[1] is dead too. We can't
7696 store the result to st(0) as st(0) gets popped on this
7697 instruction. Instead store to operands[2] (which I
7698 think has to be st(1)). st(1) will be popped later.
7699 gcc <= 2.8.1 didn't have this check and generated
7700 assembly code that the Unixware assembler rejected. */
7701 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7703 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7707 if (STACK_TOP_P (operands[0]))
7708 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7710 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7715 if (GET_CODE (operands[1]) == MEM)
7721 if (GET_CODE (operands[2]) == MEM)
7727 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7730 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7731 derived assemblers, confusingly reverse the direction of
7732 the operation for fsub{r} and fdiv{r} when the
7733 destination register is not st(0). The Intel assembler
7734 doesn't have this brain damage. Read !SYSV386_COMPAT to
7735 figure out what the hardware really does. */
7736 if (STACK_TOP_P (operands[0]))
7737 p = "{p\t%0, %2|rp\t%2, %0}";
7739 p = "{rp\t%2, %0|p\t%0, %2}";
7741 if (STACK_TOP_P (operands[0]))
7742 /* As above for fmul/fadd, we can't store to st(0). */
7743 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7745 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7750 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7753 if (STACK_TOP_P (operands[0]))
7754 p = "{rp\t%0, %1|p\t%1, %0}";
7756 p = "{p\t%1, %0|rp\t%0, %1}";
7758 if (STACK_TOP_P (operands[0]))
7759 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7761 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7766 if (STACK_TOP_P (operands[0]))
7768 if (STACK_TOP_P (operands[1]))
7769 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7771 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7774 else if (STACK_TOP_P (operands[1]))
7777 p = "{\t%1, %0|r\t%0, %1}";
7779 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7785 p = "{r\t%2, %0|\t%0, %2}";
7787 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7800 /* Output code to initialize control word copies used by
7801 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7802 is set to control word rounding downwards. */
7804 emit_i387_cw_initialization (normal, round_down)
7805 rtx normal, round_down;
7807 rtx reg = gen_reg_rtx (HImode);
7809 emit_insn (gen_x86_fnstcw_1 (normal));
7810 emit_move_insn (reg, normal);
7811 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7813 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7815 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7816 emit_move_insn (round_down, reg);
7819 /* Output code for INSN to convert a float to a signed int. OPERANDS
7820 are the insn operands. The output may be [HSD]Imode and the input
7821 operand may be [SDX]Fmode. */
7824 output_fix_trunc (insn, operands)
7828 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7829 int dimode_p = GET_MODE (operands[0]) == DImode;
7831 /* Jump through a hoop or two for DImode, since the hardware has no
7832 non-popping instruction. We used to do this a different way, but
7833 that was somewhat fragile and broke with post-reload splitters. */
7834 if (dimode_p && !stack_top_dies)
7835 output_asm_insn ("fld\t%y1", operands);
7837 if (!STACK_TOP_P (operands[1]))
7840 if (GET_CODE (operands[0]) != MEM)
7843 output_asm_insn ("fldcw\t%3", operands);
7844 if (stack_top_dies || dimode_p)
7845 output_asm_insn ("fistp%z0\t%0", operands);
7847 output_asm_insn ("fist%z0\t%0", operands);
7848 output_asm_insn ("fldcw\t%2", operands);
7853 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7854 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7855 when fucom should be used. */
7858 output_fp_compare (insn, operands, eflags_p, unordered_p)
7861 int eflags_p, unordered_p;
7864 rtx cmp_op0 = operands[0];
7865 rtx cmp_op1 = operands[1];
7866 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7871 cmp_op1 = operands[2];
7875 if (GET_MODE (operands[0]) == SFmode)
7877 return "ucomiss\t{%1, %0|%0, %1}";
7879 return "comiss\t{%1, %0|%0, %1}";
7882 return "ucomisd\t{%1, %0|%0, %1}";
7884 return "comisd\t{%1, %0|%0, %1}";
7887 if (! STACK_TOP_P (cmp_op0))
7890 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7892 if (STACK_REG_P (cmp_op1)
7894 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7895 && REGNO (cmp_op1) != FIRST_STACK_REG)
7897 /* If both the top of the 387 stack dies, and the other operand
7898 is also a stack register that dies, then this must be a
7899 `fcompp' float compare */
7903 /* There is no double popping fcomi variant. Fortunately,
7904 eflags is immune from the fstp's cc clobbering. */
7906 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7908 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7916 return "fucompp\n\tfnstsw\t%0";
7918 return "fcompp\n\tfnstsw\t%0";
7931 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7933 static const char * const alt[24] =
7945 "fcomi\t{%y1, %0|%0, %y1}",
7946 "fcomip\t{%y1, %0|%0, %y1}",
7947 "fucomi\t{%y1, %0|%0, %y1}",
7948 "fucomip\t{%y1, %0|%0, %y1}",
7955 "fcom%z2\t%y2\n\tfnstsw\t%0",
7956 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7957 "fucom%z2\t%y2\n\tfnstsw\t%0",
7958 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7960 "ficom%z2\t%y2\n\tfnstsw\t%0",
7961 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7969 mask = eflags_p << 3;
7970 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7971 mask |= unordered_p << 1;
7972 mask |= stack_top_dies;
7985 ix86_output_addr_vec_elt (file, value)
7989 const char *directive = ASM_LONG;
7994 directive = ASM_QUAD;
8000 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8004 ix86_output_addr_diff_elt (file, value, rel)
8009 fprintf (file, "%s%s%d-%s%d\n",
8010 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8011 else if (HAVE_AS_GOTOFF_IN_DATA)
8012 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8014 else if (TARGET_MACHO)
8015 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8016 machopic_function_base_name () + 1);
8019 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8020 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8023 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8027 ix86_expand_clear (dest)
8032 /* We play register width games, which are only valid after reload. */
8033 if (!reload_completed)
8036 /* Avoid HImode and its attendant prefix byte. */
8037 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8038 dest = gen_rtx_REG (SImode, REGNO (dest));
8040 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8042 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8043 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8045 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8046 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8052 /* X is an unchanging MEM. If it is a constant pool reference, return
8053 the constant pool rtx, else NULL. */
8056 maybe_get_pool_constant (x)
8059 x = ix86_delegitimize_address (XEXP (x, 0));
8061 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8062 return get_pool_constant (x);
8068 ix86_expand_move (mode, operands)
8069 enum machine_mode mode;
8072 int strict = (reload_in_progress || reload_completed);
8073 rtx insn, op0, op1, tmp;
8078 if (tls_symbolic_operand (op1, Pmode))
8080 op1 = legitimize_address (op1, op1, VOIDmode);
8081 if (GET_CODE (op0) == MEM)
8083 tmp = gen_reg_rtx (mode);
8084 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8088 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8093 rtx temp = ((reload_in_progress
8094 || ((op0 && GET_CODE (op0) == REG)
8096 ? op0 : gen_reg_rtx (Pmode));
8097 op1 = machopic_indirect_data_reference (op1, temp);
8098 op1 = machopic_legitimize_pic_address (op1, mode,
8099 temp == op1 ? 0 : temp);
8103 if (MACHOPIC_INDIRECT)
8104 op1 = machopic_indirect_data_reference (op1, 0);
8108 insn = gen_rtx_SET (VOIDmode, op0, op1);
8112 #endif /* TARGET_MACHO */
8113 if (GET_CODE (op0) == MEM)
8114 op1 = force_reg (Pmode, op1);
8118 if (GET_CODE (temp) != REG)
8119 temp = gen_reg_rtx (Pmode);
8120 temp = legitimize_pic_address (op1, temp);
8128 if (GET_CODE (op0) == MEM
8129 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8130 || !push_operand (op0, mode))
8131 && GET_CODE (op1) == MEM)
8132 op1 = force_reg (mode, op1);
8134 if (push_operand (op0, mode)
8135 && ! general_no_elim_operand (op1, mode))
8136 op1 = copy_to_mode_reg (mode, op1);
8138 /* Force large constants in 64bit compilation into register
8139 to get them CSEed. */
8140 if (TARGET_64BIT && mode == DImode
8141 && immediate_operand (op1, mode)
8142 && !x86_64_zero_extended_value (op1)
8143 && !register_operand (op0, mode)
8144 && optimize && !reload_completed && !reload_in_progress)
8145 op1 = copy_to_mode_reg (mode, op1);
8147 if (FLOAT_MODE_P (mode))
8149 /* If we are loading a floating point constant to a register,
8150 force the value to memory now, since we'll get better code
8151 out the back end. */
8155 else if (GET_CODE (op1) == CONST_DOUBLE
8156 && register_operand (op0, mode))
8157 op1 = validize_mem (force_const_mem (mode, op1));
8161 insn = gen_rtx_SET (VOIDmode, op0, op1);
8167 ix86_expand_vector_move (mode, operands)
8168 enum machine_mode mode;
8171 /* Force constants other than zero into memory. We do not know how
8172 the instructions used to build constants modify the upper 64 bits
8173 of the register, once we have that information we may be able
8174 to handle some of them more efficiently. */
8175 if ((reload_in_progress | reload_completed) == 0
8176 && register_operand (operands[0], mode)
8177 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8178 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8180 /* Make operand1 a register if it isn't already. */
8182 && !register_operand (operands[0], mode)
8183 && !register_operand (operands[1], mode))
8185 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8186 emit_move_insn (operands[0], temp);
8190 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8193 /* Attempt to expand a binary operator. Make the expansion closer to the
8194 actual machine, then just general_operand, which will allow 3 separate
8195 memory references (one output, two input) in a single insn. */
8198 ix86_expand_binary_operator (code, mode, operands)
8200 enum machine_mode mode;
8203 int matching_memory;
8204 rtx src1, src2, dst, op, clob;
8210 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8211 if (GET_RTX_CLASS (code) == 'c'
8212 && (rtx_equal_p (dst, src2)
8213 || immediate_operand (src1, mode)))
8220 /* If the destination is memory, and we do not have matching source
8221 operands, do things in registers. */
8222 matching_memory = 0;
8223 if (GET_CODE (dst) == MEM)
8225 if (rtx_equal_p (dst, src1))
8226 matching_memory = 1;
8227 else if (GET_RTX_CLASS (code) == 'c'
8228 && rtx_equal_p (dst, src2))
8229 matching_memory = 2;
8231 dst = gen_reg_rtx (mode);
8234 /* Both source operands cannot be in memory. */
8235 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8237 if (matching_memory != 2)
8238 src2 = force_reg (mode, src2);
8240 src1 = force_reg (mode, src1);
8243 /* If the operation is not commutable, source 1 cannot be a constant
8244 or non-matching memory. */
8245 if ((CONSTANT_P (src1)
8246 || (!matching_memory && GET_CODE (src1) == MEM))
8247 && GET_RTX_CLASS (code) != 'c')
8248 src1 = force_reg (mode, src1);
8250 /* If optimizing, copy to regs to improve CSE */
8251 if (optimize && ! no_new_pseudos)
8253 if (GET_CODE (dst) == MEM)
8254 dst = gen_reg_rtx (mode);
8255 if (GET_CODE (src1) == MEM)
8256 src1 = force_reg (mode, src1);
8257 if (GET_CODE (src2) == MEM)
8258 src2 = force_reg (mode, src2);
8261 /* Emit the instruction. */
8263 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8264 if (reload_in_progress)
8266 /* Reload doesn't know about the flags register, and doesn't know that
8267 it doesn't want to clobber it. We can only do this with PLUS. */
8274 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8275 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8278 /* Fix up the destination if needed. */
8279 if (dst != operands[0])
8280 emit_move_insn (operands[0], dst);
8283 /* Return TRUE or FALSE depending on whether the binary operator meets the
8284 appropriate constraints. */
8287 ix86_binary_operator_ok (code, mode, operands)
8289 enum machine_mode mode ATTRIBUTE_UNUSED;
8292 /* Both source operands cannot be in memory. */
8293 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8295 /* If the operation is not commutable, source 1 cannot be a constant. */
8296 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8298 /* If the destination is memory, we must have a matching source operand. */
8299 if (GET_CODE (operands[0]) == MEM
8300 && ! (rtx_equal_p (operands[0], operands[1])
8301 || (GET_RTX_CLASS (code) == 'c'
8302 && rtx_equal_p (operands[0], operands[2]))))
8304 /* If the operation is not commutable and the source 1 is memory, we must
8305 have a matching destination. */
8306 if (GET_CODE (operands[1]) == MEM
8307 && GET_RTX_CLASS (code) != 'c'
8308 && ! rtx_equal_p (operands[0], operands[1]))
8313 /* Attempt to expand a unary operator. Make the expansion closer to the
8314 actual machine, then just general_operand, which will allow 2 separate
8315 memory references (one output, one input) in a single insn. */
8318 ix86_expand_unary_operator (code, mode, operands)
8320 enum machine_mode mode;
8323 int matching_memory;
8324 rtx src, dst, op, clob;
8329 /* If the destination is memory, and we do not have matching source
8330 operands, do things in registers. */
8331 matching_memory = 0;
8332 if (GET_CODE (dst) == MEM)
8334 if (rtx_equal_p (dst, src))
8335 matching_memory = 1;
8337 dst = gen_reg_rtx (mode);
8340 /* When source operand is memory, destination must match. */
8341 if (!matching_memory && GET_CODE (src) == MEM)
8342 src = force_reg (mode, src);
8344 /* If optimizing, copy to regs to improve CSE */
8345 if (optimize && ! no_new_pseudos)
8347 if (GET_CODE (dst) == MEM)
8348 dst = gen_reg_rtx (mode);
8349 if (GET_CODE (src) == MEM)
8350 src = force_reg (mode, src);
8353 /* Emit the instruction. */
8355 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8356 if (reload_in_progress || code == NOT)
8358 /* Reload doesn't know about the flags register, and doesn't know that
8359 it doesn't want to clobber it. */
8366 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8367 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8370 /* Fix up the destination if needed. */
8371 if (dst != operands[0])
8372 emit_move_insn (operands[0], dst);
8375 /* Return TRUE or FALSE depending on whether the unary operator meets the
8376 appropriate constraints. */
8379 ix86_unary_operator_ok (code, mode, operands)
8380 enum rtx_code code ATTRIBUTE_UNUSED;
8381 enum machine_mode mode ATTRIBUTE_UNUSED;
8382 rtx operands[2] ATTRIBUTE_UNUSED;
8384 /* If one of operands is memory, source and destination must match. */
8385 if ((GET_CODE (operands[0]) == MEM
8386 || GET_CODE (operands[1]) == MEM)
8387 && ! rtx_equal_p (operands[0], operands[1]))
8392 /* Return TRUE or FALSE depending on whether the first SET in INSN
8393 has source and destination with matching CC modes, and that the
8394 CC mode is at least as constrained as REQ_MODE. */
8397 ix86_match_ccmode (insn, req_mode)
8399 enum machine_mode req_mode;
8402 enum machine_mode set_mode;
8404 set = PATTERN (insn);
8405 if (GET_CODE (set) == PARALLEL)
8406 set = XVECEXP (set, 0, 0);
8407 if (GET_CODE (set) != SET)
8409 if (GET_CODE (SET_SRC (set)) != COMPARE)
8412 set_mode = GET_MODE (SET_DEST (set));
8416 if (req_mode != CCNOmode
8417 && (req_mode != CCmode
8418 || XEXP (SET_SRC (set), 1) != const0_rtx))
8422 if (req_mode == CCGCmode)
8426 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8430 if (req_mode == CCZmode)
8440 return (GET_MODE (SET_SRC (set)) == set_mode);
8443 /* Generate insn patterns to do an integer compare of OPERANDS. */
8446 ix86_expand_int_compare (code, op0, op1)
8450 enum machine_mode cmpmode;
8453 cmpmode = SELECT_CC_MODE (code, op0, op1);
8454 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8456 /* This is very simple, but making the interface the same as in the
8457 FP case makes the rest of the code easier. */
8458 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8459 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8461 /* Return the test that should be put into the flags user, i.e.
8462 the bcc, scc, or cmov instruction. */
8463 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8466 /* Figure out whether to use ordered or unordered fp comparisons.
8467 Return the appropriate mode to use. */
8470 ix86_fp_compare_mode (code)
8471 enum rtx_code code ATTRIBUTE_UNUSED;
8473 /* ??? In order to make all comparisons reversible, we do all comparisons
8474 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8475 all forms trapping and nontrapping comparisons, we can make inequality
8476 comparisons trapping again, since it results in better code when using
8477 FCOM based compares. */
8478 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8482 ix86_cc_mode (code, op0, op1)
8486 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8487 return ix86_fp_compare_mode (code);
8490 /* Only zero flag is needed. */
8492 case NE: /* ZF!=0 */
8494 /* Codes needing carry flag. */
8495 case GEU: /* CF=0 */
8496 case GTU: /* CF=0 & ZF=0 */
8497 case LTU: /* CF=1 */
8498 case LEU: /* CF=1 | ZF=1 */
8500 /* Codes possibly doable only with sign flag when
8501 comparing against zero. */
8502 case GE: /* SF=OF or SF=0 */
8503 case LT: /* SF<>OF or SF=1 */
8504 if (op1 == const0_rtx)
8507 /* For other cases Carry flag is not required. */
8509 /* Codes doable only with sign flag when comparing
8510 against zero, but we miss jump instruction for it
8511 so we need to use relational tests against overflow
8512 that thus needs to be zero. */
8513 case GT: /* ZF=0 & SF=OF */
8514 case LE: /* ZF=1 | SF<>OF */
8515 if (op1 == const0_rtx)
8519 /* strcmp pattern do (use flags) and combine may ask us for proper
8528 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8531 ix86_use_fcomi_compare (code)
8532 enum rtx_code code ATTRIBUTE_UNUSED;
8534 enum rtx_code swapped_code = swap_condition (code);
8535 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8536 || (ix86_fp_comparison_cost (swapped_code)
8537 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8540 /* Swap, force into registers, or otherwise massage the two operands
8541 to a fp comparison. The operands are updated in place; the new
8542 comparison code is returned. */
8544 static enum rtx_code
8545 ix86_prepare_fp_compare_args (code, pop0, pop1)
8549 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8550 rtx op0 = *pop0, op1 = *pop1;
8551 enum machine_mode op_mode = GET_MODE (op0);
8552 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8554 /* All of the unordered compare instructions only work on registers.
8555 The same is true of the XFmode compare instructions. The same is
8556 true of the fcomi compare instructions. */
8559 && (fpcmp_mode == CCFPUmode
8560 || op_mode == XFmode
8561 || op_mode == TFmode
8562 || ix86_use_fcomi_compare (code)))
8564 op0 = force_reg (op_mode, op0);
8565 op1 = force_reg (op_mode, op1);
8569 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8570 things around if they appear profitable, otherwise force op0
8573 if (standard_80387_constant_p (op0) == 0
8574 || (GET_CODE (op0) == MEM
8575 && ! (standard_80387_constant_p (op1) == 0
8576 || GET_CODE (op1) == MEM)))
8579 tmp = op0, op0 = op1, op1 = tmp;
8580 code = swap_condition (code);
8583 if (GET_CODE (op0) != REG)
8584 op0 = force_reg (op_mode, op0);
8586 if (CONSTANT_P (op1))
8588 if (standard_80387_constant_p (op1))
8589 op1 = force_reg (op_mode, op1);
8591 op1 = validize_mem (force_const_mem (op_mode, op1));
8595 /* Try to rearrange the comparison to make it cheaper. */
8596 if (ix86_fp_comparison_cost (code)
8597 > ix86_fp_comparison_cost (swap_condition (code))
8598 && (GET_CODE (op1) == REG || !no_new_pseudos))
8601 tmp = op0, op0 = op1, op1 = tmp;
8602 code = swap_condition (code);
8603 if (GET_CODE (op0) != REG)
8604 op0 = force_reg (op_mode, op0);
8612 /* Convert comparison codes we use to represent FP comparison to integer
8613 code that will result in proper branch. Return UNKNOWN if no such code
8615 static enum rtx_code
8616 ix86_fp_compare_code_to_integer (code)
8646 /* Split comparison code CODE into comparisons we can do using branch
8647 instructions. BYPASS_CODE is comparison code for branch that will
8648 branch around FIRST_CODE and SECOND_CODE. If some of branches
8649 is not required, set value to NIL.
8650 We never require more than two branches. */
8652 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8653 enum rtx_code code, *bypass_code, *first_code, *second_code;
8659 /* The fcomi comparison sets flags as follows:
8669 case GT: /* GTU - CF=0 & ZF=0 */
8670 case GE: /* GEU - CF=0 */
8671 case ORDERED: /* PF=0 */
8672 case UNORDERED: /* PF=1 */
8673 case UNEQ: /* EQ - ZF=1 */
8674 case UNLT: /* LTU - CF=1 */
8675 case UNLE: /* LEU - CF=1 | ZF=1 */
8676 case LTGT: /* EQ - ZF=0 */
8678 case LT: /* LTU - CF=1 - fails on unordered */
8680 *bypass_code = UNORDERED;
8682 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8684 *bypass_code = UNORDERED;
8686 case EQ: /* EQ - ZF=1 - fails on unordered */
8688 *bypass_code = UNORDERED;
8690 case NE: /* NE - ZF=0 - fails on unordered */
8692 *second_code = UNORDERED;
8694 case UNGE: /* GEU - CF=0 - fails on unordered */
8696 *second_code = UNORDERED;
8698 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8700 *second_code = UNORDERED;
8705 if (!TARGET_IEEE_FP)
8712 /* Return cost of comparison done fcom + arithmetics operations on AX.
8713 All following functions do use number of instructions as a cost metrics.
8714 In future this should be tweaked to compute bytes for optimize_size and
8715 take into account performance of various instructions on various CPUs. */
8717 ix86_fp_comparison_arithmetics_cost (code)
8720 if (!TARGET_IEEE_FP)
8722 /* The cost of code output by ix86_expand_fp_compare. */
8750 /* Return cost of comparison done using fcomi operation.
8751 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8753 ix86_fp_comparison_fcomi_cost (code)
8756 enum rtx_code bypass_code, first_code, second_code;
8757 /* Return arbitrarily high cost when instruction is not supported - this
8758 prevents gcc from using it. */
8761 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8762 return (bypass_code != NIL || second_code != NIL) + 2;
8765 /* Return cost of comparison done using sahf operation.
8766 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8768 ix86_fp_comparison_sahf_cost (code)
8771 enum rtx_code bypass_code, first_code, second_code;
8772 /* Return arbitrarily high cost when instruction is not preferred - this
8773 avoids gcc from using it. */
8774 if (!TARGET_USE_SAHF && !optimize_size)
8776 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8777 return (bypass_code != NIL || second_code != NIL) + 3;
8780 /* Compute cost of the comparison done using any method.
8781 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8783 ix86_fp_comparison_cost (code)
8786 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8789 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8790 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8792 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8793 if (min > sahf_cost)
8795 if (min > fcomi_cost)
8800 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8803 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8805 rtx op0, op1, scratch;
8809 enum machine_mode fpcmp_mode, intcmp_mode;
8811 int cost = ix86_fp_comparison_cost (code);
8812 enum rtx_code bypass_code, first_code, second_code;
8814 fpcmp_mode = ix86_fp_compare_mode (code);
8815 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8818 *second_test = NULL_RTX;
8820 *bypass_test = NULL_RTX;
8822 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8824 /* Do fcomi/sahf based test when profitable. */
8825 if ((bypass_code == NIL || bypass_test)
8826 && (second_code == NIL || second_test)
8827 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8831 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8832 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8838 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8839 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8841 scratch = gen_reg_rtx (HImode);
8842 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8843 emit_insn (gen_x86_sahf_1 (scratch));
8846 /* The FP codes work out to act like unsigned. */
8847 intcmp_mode = fpcmp_mode;
8849 if (bypass_code != NIL)
8850 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8851 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8853 if (second_code != NIL)
8854 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8855 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8860 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8861 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8862 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8864 scratch = gen_reg_rtx (HImode);
8865 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8867 /* In the unordered case, we have to check C2 for NaN's, which
8868 doesn't happen to work out to anything nice combination-wise.
8869 So do some bit twiddling on the value we've got in AH to come
8870 up with an appropriate set of condition codes. */
8872 intcmp_mode = CCNOmode;
8877 if (code == GT || !TARGET_IEEE_FP)
8879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8884 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8885 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8886 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8887 intcmp_mode = CCmode;
8893 if (code == LT && TARGET_IEEE_FP)
8895 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8896 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8897 intcmp_mode = CCmode;
8902 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8908 if (code == GE || !TARGET_IEEE_FP)
8910 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8915 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8916 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8923 if (code == LE && TARGET_IEEE_FP)
8925 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8926 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8927 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8928 intcmp_mode = CCmode;
8933 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8939 if (code == EQ && TARGET_IEEE_FP)
8941 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8942 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8943 intcmp_mode = CCmode;
8948 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8955 if (code == NE && TARGET_IEEE_FP)
8957 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8958 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8964 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8970 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8974 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8983 /* Return the test that should be put into the flags user, i.e.
8984 the bcc, scc, or cmov instruction. */
8985 return gen_rtx_fmt_ee (code, VOIDmode,
8986 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8991 ix86_expand_compare (code, second_test, bypass_test)
8993 rtx *second_test, *bypass_test;
8996 op0 = ix86_compare_op0;
8997 op1 = ix86_compare_op1;
9000 *second_test = NULL_RTX;
9002 *bypass_test = NULL_RTX;
9004 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9005 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9006 second_test, bypass_test);
9008 ret = ix86_expand_int_compare (code, op0, op1);
9013 /* Return true if the CODE will result in nontrivial jump sequence. */
9015 ix86_fp_jump_nontrivial_p (code)
9018 enum rtx_code bypass_code, first_code, second_code;
9021 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9022 return bypass_code != NIL || second_code != NIL;
9026 ix86_expand_branch (code, label)
9032 switch (GET_MODE (ix86_compare_op0))
9038 tmp = ix86_expand_compare (code, NULL, NULL);
9039 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9040 gen_rtx_LABEL_REF (VOIDmode, label),
9042 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9052 enum rtx_code bypass_code, first_code, second_code;
9054 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9057 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9059 /* Check whether we will use the natural sequence with one jump. If
9060 so, we can expand jump early. Otherwise delay expansion by
9061 creating compound insn to not confuse optimizers. */
9062 if (bypass_code == NIL && second_code == NIL
9065 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9066 gen_rtx_LABEL_REF (VOIDmode, label),
9071 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9072 ix86_compare_op0, ix86_compare_op1);
9073 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9074 gen_rtx_LABEL_REF (VOIDmode, label),
9076 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9078 use_fcomi = ix86_use_fcomi_compare (code);
9079 vec = rtvec_alloc (3 + !use_fcomi);
9080 RTVEC_ELT (vec, 0) = tmp;
9082 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9084 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9087 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9089 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9097 /* Expand DImode branch into multiple compare+branch. */
9099 rtx lo[2], hi[2], label2;
9100 enum rtx_code code1, code2, code3;
9102 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9104 tmp = ix86_compare_op0;
9105 ix86_compare_op0 = ix86_compare_op1;
9106 ix86_compare_op1 = tmp;
9107 code = swap_condition (code);
9109 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9110 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9112 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9113 avoid two branches. This costs one extra insn, so disable when
9114 optimizing for size. */
9116 if ((code == EQ || code == NE)
9118 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9123 if (hi[1] != const0_rtx)
9124 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9125 NULL_RTX, 0, OPTAB_WIDEN);
9128 if (lo[1] != const0_rtx)
9129 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9130 NULL_RTX, 0, OPTAB_WIDEN);
9132 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9133 NULL_RTX, 0, OPTAB_WIDEN);
9135 ix86_compare_op0 = tmp;
9136 ix86_compare_op1 = const0_rtx;
9137 ix86_expand_branch (code, label);
9141 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9142 op1 is a constant and the low word is zero, then we can just
9143 examine the high word. */
9145 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9148 case LT: case LTU: case GE: case GEU:
9149 ix86_compare_op0 = hi[0];
9150 ix86_compare_op1 = hi[1];
9151 ix86_expand_branch (code, label);
9157 /* Otherwise, we need two or three jumps. */
9159 label2 = gen_label_rtx ();
9162 code2 = swap_condition (code);
9163 code3 = unsigned_condition (code);
9167 case LT: case GT: case LTU: case GTU:
9170 case LE: code1 = LT; code2 = GT; break;
9171 case GE: code1 = GT; code2 = LT; break;
9172 case LEU: code1 = LTU; code2 = GTU; break;
9173 case GEU: code1 = GTU; code2 = LTU; break;
9175 case EQ: code1 = NIL; code2 = NE; break;
9176 case NE: code2 = NIL; break;
9184 * if (hi(a) < hi(b)) goto true;
9185 * if (hi(a) > hi(b)) goto false;
9186 * if (lo(a) < lo(b)) goto true;
9190 ix86_compare_op0 = hi[0];
9191 ix86_compare_op1 = hi[1];
9194 ix86_expand_branch (code1, label);
9196 ix86_expand_branch (code2, label2);
9198 ix86_compare_op0 = lo[0];
9199 ix86_compare_op1 = lo[1];
9200 ix86_expand_branch (code3, label);
9203 emit_label (label2);
9212 /* Split branch based on floating point condition. */
9214 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9216 rtx op1, op2, target1, target2, tmp;
9219 rtx label = NULL_RTX;
9221 int bypass_probability = -1, second_probability = -1, probability = -1;
9224 if (target2 != pc_rtx)
9227 code = reverse_condition_maybe_unordered (code);
9232 condition = ix86_expand_fp_compare (code, op1, op2,
9233 tmp, &second, &bypass);
9235 if (split_branch_probability >= 0)
9237 /* Distribute the probabilities across the jumps.
9238 Assume the BYPASS and SECOND to be always test
9240 probability = split_branch_probability;
9242 /* Value of 1 is low enough to make no need for probability
9243 to be updated. Later we may run some experiments and see
9244 if unordered values are more frequent in practice. */
9246 bypass_probability = 1;
9248 second_probability = 1;
9250 if (bypass != NULL_RTX)
9252 label = gen_label_rtx ();
9253 i = emit_jump_insn (gen_rtx_SET
9255 gen_rtx_IF_THEN_ELSE (VOIDmode,
9257 gen_rtx_LABEL_REF (VOIDmode,
9260 if (bypass_probability >= 0)
9262 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9263 GEN_INT (bypass_probability),
9266 i = emit_jump_insn (gen_rtx_SET
9268 gen_rtx_IF_THEN_ELSE (VOIDmode,
9269 condition, target1, target2)));
9270 if (probability >= 0)
9272 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9273 GEN_INT (probability),
9275 if (second != NULL_RTX)
9277 i = emit_jump_insn (gen_rtx_SET
9279 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9281 if (second_probability >= 0)
9283 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9284 GEN_INT (second_probability),
9287 if (label != NULL_RTX)
9292 ix86_expand_setcc (code, dest)
9296 rtx ret, tmp, tmpreg;
9297 rtx second_test, bypass_test;
9299 if (GET_MODE (ix86_compare_op0) == DImode
9301 return 0; /* FAIL */
9303 if (GET_MODE (dest) != QImode)
9306 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9307 PUT_MODE (ret, QImode);
9312 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9313 if (bypass_test || second_test)
9315 rtx test = second_test;
9317 rtx tmp2 = gen_reg_rtx (QImode);
9324 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9326 PUT_MODE (test, QImode);
9327 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9330 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9332 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9335 return 1; /* DONE */
9338 /* Expand comparison setting or clearing carry flag. Return true when successful
9339 and set pop for the operation. */
9341 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9345 enum machine_mode mode =
9346 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9348 /* Do not handle DImode compares that go trought special path. Also we can't
9349 deal with FP compares yet. This is possible to add. */
9350 if ((mode == DImode && !TARGET_64BIT))
9352 if (FLOAT_MODE_P (mode))
9354 rtx second_test = NULL, bypass_test = NULL;
9355 rtx compare_op, compare_seq;
9357 /* Shortcut: following common codes never translate into carry flag compares. */
9358 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9359 || code == ORDERED || code == UNORDERED)
9362 /* These comparisons require zero flag; swap operands so they won't. */
9363 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9369 code = swap_condition (code);
9372 /* Try to expand the comparsion and verify that we end up with carry flag
9373 based comparsion. This is fails to be true only when we decide to expand
9374 comparsion using arithmetic that is not too common scenario. */
9376 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9377 &second_test, &bypass_test);
9378 compare_seq = get_insns ();
9381 if (second_test || bypass_test)
9383 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9384 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9385 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9387 code = GET_CODE (compare_op);
9388 if (code != LTU && code != GEU)
9390 emit_insn (compare_seq);
9394 if (!INTEGRAL_MODE_P (mode))
9402 /* Convert a==0 into (unsigned)a<1. */
9405 if (op1 != const0_rtx)
9408 code = (code == EQ ? LTU : GEU);
9411 /* Convert a>b into b<a or a>=b-1. */
9414 if (GET_CODE (op1) == CONST_INT)
9416 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9417 /* Bail out on overflow. We still can swap operands but that
9418 would force loading of the constant into register. */
9419 if (op1 == const0_rtx
9420 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9422 code = (code == GTU ? GEU : LTU);
9429 code = (code == GTU ? LTU : GEU);
9433 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9436 if (mode == DImode || op1 != const0_rtx)
9438 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9439 code = (code == LT ? GEU : LTU);
9443 if (mode == DImode || op1 != constm1_rtx)
9445 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9446 code = (code == LE ? GEU : LTU);
9452 ix86_compare_op0 = op0;
9453 ix86_compare_op1 = op1;
9454 *pop = ix86_expand_compare (code, NULL, NULL);
9455 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9461 ix86_expand_int_movcc (operands)
9464 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9465 rtx compare_seq, compare_op;
9466 rtx second_test, bypass_test;
9467 enum machine_mode mode = GET_MODE (operands[0]);
9468 bool sign_bit_compare_p = false;;
9471 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9472 compare_seq = get_insns ();
9475 compare_code = GET_CODE (compare_op);
9477 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9478 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9479 sign_bit_compare_p = true;
9481 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9482 HImode insns, we'd be swallowed in word prefix ops. */
9484 if ((mode != HImode || TARGET_FAST_PREFIX)
9485 && (mode != DImode || TARGET_64BIT)
9486 && GET_CODE (operands[2]) == CONST_INT
9487 && GET_CODE (operands[3]) == CONST_INT)
9489 rtx out = operands[0];
9490 HOST_WIDE_INT ct = INTVAL (operands[2]);
9491 HOST_WIDE_INT cf = INTVAL (operands[3]);
9495 /* Sign bit compares are better done using shifts than we do by using
9497 if (sign_bit_compare_p
9498 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9499 ix86_compare_op1, &compare_op))
9501 /* Detect overlap between destination and compare sources. */
9504 if (!sign_bit_compare_p)
9508 compare_code = GET_CODE (compare_op);
9510 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9511 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9514 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9517 /* To simplify rest of code, restrict to the GEU case. */
9518 if (compare_code == LTU)
9520 HOST_WIDE_INT tmp = ct;
9523 compare_code = reverse_condition (compare_code);
9524 code = reverse_condition (code);
9529 PUT_CODE (compare_op,
9530 reverse_condition_maybe_unordered
9531 (GET_CODE (compare_op)));
9533 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9537 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9538 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9539 tmp = gen_reg_rtx (mode);
9542 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9544 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9548 if (code == GT || code == GE)
9549 code = reverse_condition (code);
9552 HOST_WIDE_INT tmp = ct;
9557 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9558 ix86_compare_op1, VOIDmode, 0, -1);
9571 tmp = expand_simple_binop (mode, PLUS,
9573 copy_rtx (tmp), 1, OPTAB_DIRECT);
9584 tmp = expand_simple_binop (mode, IOR,
9586 copy_rtx (tmp), 1, OPTAB_DIRECT);
9588 else if (diff == -1 && ct)
9598 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9600 tmp = expand_simple_binop (mode, PLUS,
9601 copy_rtx (tmp), GEN_INT (cf),
9602 copy_rtx (tmp), 1, OPTAB_DIRECT);
9610 * andl cf - ct, dest
9620 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9623 tmp = expand_simple_binop (mode, AND,
9625 gen_int_mode (cf - ct, mode),
9626 copy_rtx (tmp), 1, OPTAB_DIRECT);
9628 tmp = expand_simple_binop (mode, PLUS,
9629 copy_rtx (tmp), GEN_INT (ct),
9630 copy_rtx (tmp), 1, OPTAB_DIRECT);
9633 if (!rtx_equal_p (tmp, out))
9634 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9636 return 1; /* DONE */
9642 tmp = ct, ct = cf, cf = tmp;
9644 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9646 /* We may be reversing unordered compare to normal compare, that
9647 is not valid in general (we may convert non-trapping condition
9648 to trapping one), however on i386 we currently emit all
9649 comparisons unordered. */
9650 compare_code = reverse_condition_maybe_unordered (compare_code);
9651 code = reverse_condition_maybe_unordered (code);
9655 compare_code = reverse_condition (compare_code);
9656 code = reverse_condition (code);
9661 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9662 && GET_CODE (ix86_compare_op1) == CONST_INT)
9664 if (ix86_compare_op1 == const0_rtx
9665 && (code == LT || code == GE))
9666 compare_code = code;
9667 else if (ix86_compare_op1 == constm1_rtx)
9671 else if (code == GT)
9676 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9677 if (compare_code != NIL
9678 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9679 && (cf == -1 || ct == -1))
9681 /* If lea code below could be used, only optimize
9682 if it results in a 2 insn sequence. */
9684 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9685 || diff == 3 || diff == 5 || diff == 9)
9686 || (compare_code == LT && ct == -1)
9687 || (compare_code == GE && cf == -1))
9690 * notl op1 (if necessary)
9698 code = reverse_condition (code);
9701 out = emit_store_flag (out, code, ix86_compare_op0,
9702 ix86_compare_op1, VOIDmode, 0, -1);
9704 out = expand_simple_binop (mode, IOR,
9706 out, 1, OPTAB_DIRECT);
9707 if (out != operands[0])
9708 emit_move_insn (operands[0], out);
9710 return 1; /* DONE */
9715 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9716 || diff == 3 || diff == 5 || diff == 9)
9717 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9718 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9724 * lea cf(dest*(ct-cf)),dest
9728 * This also catches the degenerate setcc-only case.
9734 out = emit_store_flag (out, code, ix86_compare_op0,
9735 ix86_compare_op1, VOIDmode, 0, 1);
9738 /* On x86_64 the lea instruction operates on Pmode, so we need
9739 to get arithmetics done in proper mode to match. */
9741 tmp = copy_rtx (out);
9745 out1 = copy_rtx (out);
9746 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9750 tmp = gen_rtx_PLUS (mode, tmp, out1);
9756 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9759 if (!rtx_equal_p (tmp, out))
9762 out = force_operand (tmp, copy_rtx (out));
9764 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9766 if (!rtx_equal_p (out, operands[0]))
9767 emit_move_insn (operands[0], copy_rtx (out));
9769 return 1; /* DONE */
9773 * General case: Jumpful:
9774 * xorl dest,dest cmpl op1, op2
9775 * cmpl op1, op2 movl ct, dest
9777 * decl dest movl cf, dest
9778 * andl (cf-ct),dest 1:
9783 * This is reasonably steep, but branch mispredict costs are
9784 * high on modern cpus, so consider failing only if optimizing
9788 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9789 && BRANCH_COST >= 2)
9795 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9796 /* We may be reversing unordered compare to normal compare,
9797 that is not valid in general (we may convert non-trapping
9798 condition to trapping one), however on i386 we currently
9799 emit all comparisons unordered. */
9800 code = reverse_condition_maybe_unordered (code);
9803 code = reverse_condition (code);
9804 if (compare_code != NIL)
9805 compare_code = reverse_condition (compare_code);
9809 if (compare_code != NIL)
9811 /* notl op1 (if needed)
9816 For x < 0 (resp. x <= -1) there will be no notl,
9817 so if possible swap the constants to get rid of the
9819 True/false will be -1/0 while code below (store flag
9820 followed by decrement) is 0/-1, so the constants need
9821 to be exchanged once more. */
9823 if (compare_code == GE || !cf)
9825 code = reverse_condition (code);
9830 HOST_WIDE_INT tmp = cf;
9835 out = emit_store_flag (out, code, ix86_compare_op0,
9836 ix86_compare_op1, VOIDmode, 0, -1);
9840 out = emit_store_flag (out, code, ix86_compare_op0,
9841 ix86_compare_op1, VOIDmode, 0, 1);
9843 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9844 copy_rtx (out), 1, OPTAB_DIRECT);
9847 out = expand_simple_binop (mode, AND, copy_rtx (out),
9848 gen_int_mode (cf - ct, mode),
9849 copy_rtx (out), 1, OPTAB_DIRECT);
9851 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9852 copy_rtx (out), 1, OPTAB_DIRECT);
9853 if (!rtx_equal_p (out, operands[0]))
9854 emit_move_insn (operands[0], copy_rtx (out));
9856 return 1; /* DONE */
9860 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9862 /* Try a few things more with specific constants and a variable. */
9865 rtx var, orig_out, out, tmp;
9867 if (BRANCH_COST <= 2)
9868 return 0; /* FAIL */
9870 /* If one of the two operands is an interesting constant, load a
9871 constant with the above and mask it in with a logical operation. */
9873 if (GET_CODE (operands[2]) == CONST_INT)
9876 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9877 operands[3] = constm1_rtx, op = and_optab;
9878 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9879 operands[3] = const0_rtx, op = ior_optab;
9881 return 0; /* FAIL */
9883 else if (GET_CODE (operands[3]) == CONST_INT)
9886 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9887 operands[2] = constm1_rtx, op = and_optab;
9888 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9889 operands[2] = const0_rtx, op = ior_optab;
9891 return 0; /* FAIL */
9894 return 0; /* FAIL */
9896 orig_out = operands[0];
9897 tmp = gen_reg_rtx (mode);
9900 /* Recurse to get the constant loaded. */
9901 if (ix86_expand_int_movcc (operands) == 0)
9902 return 0; /* FAIL */
9904 /* Mask in the interesting variable. */
9905 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9907 if (!rtx_equal_p (out, orig_out))
9908 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9910 return 1; /* DONE */
9914 * For comparison with above,
9924 if (! nonimmediate_operand (operands[2], mode))
9925 operands[2] = force_reg (mode, operands[2]);
9926 if (! nonimmediate_operand (operands[3], mode))
9927 operands[3] = force_reg (mode, operands[3]);
9929 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9931 rtx tmp = gen_reg_rtx (mode);
9932 emit_move_insn (tmp, operands[3]);
9935 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9937 rtx tmp = gen_reg_rtx (mode);
9938 emit_move_insn (tmp, operands[2]);
9942 if (! register_operand (operands[2], VOIDmode)
9944 || ! register_operand (operands[3], VOIDmode)))
9945 operands[2] = force_reg (mode, operands[2]);
9948 && ! register_operand (operands[3], VOIDmode))
9949 operands[3] = force_reg (mode, operands[3]);
9951 emit_insn (compare_seq);
9952 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9953 gen_rtx_IF_THEN_ELSE (mode,
9954 compare_op, operands[2],
9957 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9958 gen_rtx_IF_THEN_ELSE (mode,
9960 copy_rtx (operands[3]),
9961 copy_rtx (operands[0]))));
9963 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9964 gen_rtx_IF_THEN_ELSE (mode,
9966 copy_rtx (operands[2]),
9967 copy_rtx (operands[0]))));
9969 return 1; /* DONE */
9973 ix86_expand_fp_movcc (operands)
9978 rtx compare_op, second_test, bypass_test;
9980 /* For SF/DFmode conditional moves based on comparisons
9981 in same mode, we may want to use SSE min/max instructions. */
9982 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9983 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9984 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9985 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9987 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9988 /* We may be called from the post-reload splitter. */
9989 && (!REG_P (operands[0])
9990 || SSE_REG_P (operands[0])
9991 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9993 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9994 code = GET_CODE (operands[1]);
9996 /* See if we have (cross) match between comparison operands and
9997 conditional move operands. */
9998 if (rtx_equal_p (operands[2], op1))
10003 code = reverse_condition_maybe_unordered (code);
10005 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10007 /* Check for min operation. */
10008 if (code == LT || code == UNLE)
10016 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10017 if (memory_operand (op0, VOIDmode))
10018 op0 = force_reg (GET_MODE (operands[0]), op0);
10019 if (GET_MODE (operands[0]) == SFmode)
10020 emit_insn (gen_minsf3 (operands[0], op0, op1));
10022 emit_insn (gen_mindf3 (operands[0], op0, op1));
10025 /* Check for max operation. */
10026 if (code == GT || code == UNGE)
10034 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10035 if (memory_operand (op0, VOIDmode))
10036 op0 = force_reg (GET_MODE (operands[0]), op0);
10037 if (GET_MODE (operands[0]) == SFmode)
10038 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10040 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10044 /* Manage condition to be sse_comparison_operator. In case we are
10045 in non-ieee mode, try to canonicalize the destination operand
10046 to be first in the comparison - this helps reload to avoid extra
10048 if (!sse_comparison_operator (operands[1], VOIDmode)
10049 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10051 rtx tmp = ix86_compare_op0;
10052 ix86_compare_op0 = ix86_compare_op1;
10053 ix86_compare_op1 = tmp;
10054 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10055 VOIDmode, ix86_compare_op0,
10058 /* Similarly try to manage result to be first operand of conditional
10059 move. We also don't support the NE comparison on SSE, so try to
10061 if ((rtx_equal_p (operands[0], operands[3])
10062 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10063 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10065 rtx tmp = operands[2];
10066 operands[2] = operands[3];
10068 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10069 (GET_CODE (operands[1])),
10070 VOIDmode, ix86_compare_op0,
10073 if (GET_MODE (operands[0]) == SFmode)
10074 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10075 operands[2], operands[3],
10076 ix86_compare_op0, ix86_compare_op1));
10078 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10079 operands[2], operands[3],
10080 ix86_compare_op0, ix86_compare_op1));
10084 /* The floating point conditional move instructions don't directly
10085 support conditions resulting from a signed integer comparison. */
10087 code = GET_CODE (operands[1]);
10088 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10090 /* The floating point conditional move instructions don't directly
10091 support signed integer comparisons. */
10093 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10095 if (second_test != NULL || bypass_test != NULL)
10097 tmp = gen_reg_rtx (QImode);
10098 ix86_expand_setcc (code, tmp);
10100 ix86_compare_op0 = tmp;
10101 ix86_compare_op1 = const0_rtx;
10102 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10104 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10106 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10107 emit_move_insn (tmp, operands[3]);
10110 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10112 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10113 emit_move_insn (tmp, operands[2]);
10117 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10118 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10123 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10124 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10129 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10130 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10138 /* Expand conditional increment or decrement using adb/sbb instructions.
10139 The default case using setcc followed by the conditional move can be
10140 done by generic code. */
10142 ix86_expand_int_addcc (operands)
10145 enum rtx_code code = GET_CODE (operands[1]);
10147 rtx val = const0_rtx;
10148 bool fpcmp = false;
10149 enum machine_mode mode = GET_MODE (operands[0]);
10151 if (operands[3] != const1_rtx
10152 && operands[3] != constm1_rtx)
10154 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10155 ix86_compare_op1, &compare_op))
10157 code = GET_CODE (compare_op);
10159 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10160 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10163 code = ix86_fp_compare_code_to_integer (code);
10170 PUT_CODE (compare_op,
10171 reverse_condition_maybe_unordered
10172 (GET_CODE (compare_op)));
10174 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10176 PUT_MODE (compare_op, mode);
10178 /* Construct either adc or sbb insn. */
10179 if ((code == LTU) == (operands[3] == constm1_rtx))
10181 switch (GET_MODE (operands[0]))
10184 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10187 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10190 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10193 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10201 switch (GET_MODE (operands[0]))
10204 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10207 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10210 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10213 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10219 return 1; /* DONE */
10223 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10224 works for floating pointer parameters and nonoffsetable memories.
10225 For pushes, it returns just stack offsets; the values will be saved
10226 in the right order. Maximally three parts are generated. */
10229 ix86_split_to_parts (operand, parts, mode)
10232 enum machine_mode mode;
10237 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10239 size = (GET_MODE_SIZE (mode) + 4) / 8;
10241 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10243 if (size < 2 || size > 3)
10246 /* Optimize constant pool reference to immediates. This is used by fp
10247 moves, that force all constants to memory to allow combining. */
10248 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10250 rtx tmp = maybe_get_pool_constant (operand);
10255 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10257 /* The only non-offsetable memories we handle are pushes. */
10258 if (! push_operand (operand, VOIDmode))
10261 operand = copy_rtx (operand);
10262 PUT_MODE (operand, Pmode);
10263 parts[0] = parts[1] = parts[2] = operand;
10265 else if (!TARGET_64BIT)
10267 if (mode == DImode)
10268 split_di (&operand, 1, &parts[0], &parts[1]);
10271 if (REG_P (operand))
10273 if (!reload_completed)
10275 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10276 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10278 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10280 else if (offsettable_memref_p (operand))
10282 operand = adjust_address (operand, SImode, 0);
10283 parts[0] = operand;
10284 parts[1] = adjust_address (operand, SImode, 4);
10286 parts[2] = adjust_address (operand, SImode, 8);
10288 else if (GET_CODE (operand) == CONST_DOUBLE)
10293 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10298 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10299 parts[2] = gen_int_mode (l[2], SImode);
10302 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10307 parts[1] = gen_int_mode (l[1], SImode);
10308 parts[0] = gen_int_mode (l[0], SImode);
10316 if (mode == TImode)
10317 split_ti (&operand, 1, &parts[0], &parts[1]);
10318 if (mode == XFmode || mode == TFmode)
10320 if (REG_P (operand))
10322 if (!reload_completed)
10324 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10325 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10327 else if (offsettable_memref_p (operand))
10329 operand = adjust_address (operand, DImode, 0);
10330 parts[0] = operand;
10331 parts[1] = adjust_address (operand, SImode, 8);
10333 else if (GET_CODE (operand) == CONST_DOUBLE)
10338 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10339 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10340 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10341 if (HOST_BITS_PER_WIDE_INT >= 64)
10344 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10345 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10348 parts[0] = immed_double_const (l[0], l[1], DImode);
10349 parts[1] = gen_int_mode (l[2], SImode);
10359 /* Emit insns to perform a move or push of DI, DF, and XF values.
10360 Return false when normal moves are needed; true when all required
10361 insns have been emitted. Operands 2-4 contain the input values
10362 int the correct order; operands 5-7 contain the output values. */
10365 ix86_split_long_move (operands)
10371 int collisions = 0;
10372 enum machine_mode mode = GET_MODE (operands[0]);
10374 /* The DFmode expanders may ask us to move double.
10375 For 64bit target this is single move. By hiding the fact
10376 here we simplify i386.md splitters. */
10377 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10379 /* Optimize constant pool reference to immediates. This is used by
10380 fp moves, that force all constants to memory to allow combining. */
10382 if (GET_CODE (operands[1]) == MEM
10383 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10384 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10385 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10386 if (push_operand (operands[0], VOIDmode))
10388 operands[0] = copy_rtx (operands[0]);
10389 PUT_MODE (operands[0], Pmode);
10392 operands[0] = gen_lowpart (DImode, operands[0]);
10393 operands[1] = gen_lowpart (DImode, operands[1]);
10394 emit_move_insn (operands[0], operands[1]);
10398 /* The only non-offsettable memory we handle is push. */
10399 if (push_operand (operands[0], VOIDmode))
10401 else if (GET_CODE (operands[0]) == MEM
10402 && ! offsettable_memref_p (operands[0]))
10405 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10406 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10408 /* When emitting push, take care for source operands on the stack. */
10409 if (push && GET_CODE (operands[1]) == MEM
10410 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10413 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10414 XEXP (part[1][2], 0));
10415 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10416 XEXP (part[1][1], 0));
10419 /* We need to do copy in the right order in case an address register
10420 of the source overlaps the destination. */
10421 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10423 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10425 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10428 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10431 /* Collision in the middle part can be handled by reordering. */
10432 if (collisions == 1 && nparts == 3
10433 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10436 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10437 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10440 /* If there are more collisions, we can't handle it by reordering.
10441 Do an lea to the last part and use only one colliding move. */
10442 else if (collisions > 1)
10445 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10446 XEXP (part[1][0], 0)));
10447 part[1][0] = change_address (part[1][0],
10448 TARGET_64BIT ? DImode : SImode,
10449 part[0][nparts - 1]);
10450 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10452 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10462 /* We use only first 12 bytes of TFmode value, but for pushing we
10463 are required to adjust stack as if we were pushing real 16byte
10465 if (mode == TFmode && !TARGET_64BIT)
10466 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10468 emit_move_insn (part[0][2], part[1][2]);
10473 /* In 64bit mode we don't have 32bit push available. In case this is
10474 register, it is OK - we will just use larger counterpart. We also
10475 retype memory - these comes from attempt to avoid REX prefix on
10476 moving of second half of TFmode value. */
10477 if (GET_MODE (part[1][1]) == SImode)
10479 if (GET_CODE (part[1][1]) == MEM)
10480 part[1][1] = adjust_address (part[1][1], DImode, 0);
10481 else if (REG_P (part[1][1]))
10482 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10485 if (GET_MODE (part[1][0]) == SImode)
10486 part[1][0] = part[1][1];
10489 emit_move_insn (part[0][1], part[1][1]);
10490 emit_move_insn (part[0][0], part[1][0]);
10494 /* Choose correct order to not overwrite the source before it is copied. */
10495 if ((REG_P (part[0][0])
10496 && REG_P (part[1][1])
10497 && (REGNO (part[0][0]) == REGNO (part[1][1])
10499 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10501 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10505 operands[2] = part[0][2];
10506 operands[3] = part[0][1];
10507 operands[4] = part[0][0];
10508 operands[5] = part[1][2];
10509 operands[6] = part[1][1];
10510 operands[7] = part[1][0];
10514 operands[2] = part[0][1];
10515 operands[3] = part[0][0];
10516 operands[5] = part[1][1];
10517 operands[6] = part[1][0];
10524 operands[2] = part[0][0];
10525 operands[3] = part[0][1];
10526 operands[4] = part[0][2];
10527 operands[5] = part[1][0];
10528 operands[6] = part[1][1];
10529 operands[7] = part[1][2];
10533 operands[2] = part[0][0];
10534 operands[3] = part[0][1];
10535 operands[5] = part[1][0];
10536 operands[6] = part[1][1];
10539 emit_move_insn (operands[2], operands[5]);
10540 emit_move_insn (operands[3], operands[6]);
10542 emit_move_insn (operands[4], operands[7]);
10548 ix86_split_ashldi (operands, scratch)
10549 rtx *operands, scratch;
10551 rtx low[2], high[2];
10554 if (GET_CODE (operands[2]) == CONST_INT)
10556 split_di (operands, 2, low, high);
10557 count = INTVAL (operands[2]) & 63;
10561 emit_move_insn (high[0], low[1]);
10562 emit_move_insn (low[0], const0_rtx);
10565 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10569 if (!rtx_equal_p (operands[0], operands[1]))
10570 emit_move_insn (operands[0], operands[1]);
10571 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10572 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10577 if (!rtx_equal_p (operands[0], operands[1]))
10578 emit_move_insn (operands[0], operands[1]);
10580 split_di (operands, 1, low, high);
10582 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10583 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10585 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10587 if (! no_new_pseudos)
10588 scratch = force_reg (SImode, const0_rtx);
10590 emit_move_insn (scratch, const0_rtx);
10592 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10596 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10601 ix86_split_ashrdi (operands, scratch)
10602 rtx *operands, scratch;
10604 rtx low[2], high[2];
10607 if (GET_CODE (operands[2]) == CONST_INT)
10609 split_di (operands, 2, low, high);
10610 count = INTVAL (operands[2]) & 63;
10614 emit_move_insn (low[0], high[1]);
10616 if (! reload_completed)
10617 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10620 emit_move_insn (high[0], low[0]);
10621 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10625 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10629 if (!rtx_equal_p (operands[0], operands[1]))
10630 emit_move_insn (operands[0], operands[1]);
10631 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10632 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10637 if (!rtx_equal_p (operands[0], operands[1]))
10638 emit_move_insn (operands[0], operands[1]);
10640 split_di (operands, 1, low, high);
10642 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10643 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10645 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10647 if (! no_new_pseudos)
10648 scratch = gen_reg_rtx (SImode);
10649 emit_move_insn (scratch, high[0]);
10650 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10651 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10655 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10660 ix86_split_lshrdi (operands, scratch)
10661 rtx *operands, scratch;
10663 rtx low[2], high[2];
10666 if (GET_CODE (operands[2]) == CONST_INT)
10668 split_di (operands, 2, low, high);
10669 count = INTVAL (operands[2]) & 63;
10673 emit_move_insn (low[0], high[1]);
10674 emit_move_insn (high[0], const0_rtx);
10677 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10681 if (!rtx_equal_p (operands[0], operands[1]))
10682 emit_move_insn (operands[0], operands[1]);
10683 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10684 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10689 if (!rtx_equal_p (operands[0], operands[1]))
10690 emit_move_insn (operands[0], operands[1]);
10692 split_di (operands, 1, low, high);
10694 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10695 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10697 /* Heh. By reversing the arguments, we can reuse this pattern. */
10698 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10700 if (! no_new_pseudos)
10701 scratch = force_reg (SImode, const0_rtx);
10703 emit_move_insn (scratch, const0_rtx);
10705 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10709 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10713 /* Helper function for the string operations below. Dest VARIABLE whether
10714 it is aligned to VALUE bytes. If true, jump to the label. */
10716 ix86_expand_aligntest (variable, value)
10720 rtx label = gen_label_rtx ();
10721 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10722 if (GET_MODE (variable) == DImode)
10723 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10725 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10726 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10731 /* Adjust COUNTER by the VALUE. */
10733 ix86_adjust_counter (countreg, value)
10735 HOST_WIDE_INT value;
10737 if (GET_MODE (countreg) == DImode)
10738 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10740 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10743 /* Zero extend possibly SImode EXP to Pmode register. */
10745 ix86_zero_extend_to_Pmode (exp)
10749 if (GET_MODE (exp) == VOIDmode)
10750 return force_reg (Pmode, exp);
10751 if (GET_MODE (exp) == Pmode)
10752 return copy_to_mode_reg (Pmode, exp);
10753 r = gen_reg_rtx (Pmode);
10754 emit_insn (gen_zero_extendsidi2 (r, exp));
10758 /* Expand string move (memcpy) operation. Use i386 string operations when
10759 profitable. expand_clrstr contains similar code. */
10761 ix86_expand_movstr (dst, src, count_exp, align_exp)
10762 rtx dst, src, count_exp, align_exp;
10764 rtx srcreg, destreg, countreg;
10765 enum machine_mode counter_mode;
10766 HOST_WIDE_INT align = 0;
10767 unsigned HOST_WIDE_INT count = 0;
10771 if (GET_CODE (align_exp) == CONST_INT)
10772 align = INTVAL (align_exp);
10774 /* This simple hack avoids all inlining code and simplifies code below. */
10775 if (!TARGET_ALIGN_STRINGOPS)
10778 if (GET_CODE (count_exp) == CONST_INT)
10780 count = INTVAL (count_exp);
10781 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10785 /* Figure out proper mode for counter. For 32bits it is always SImode,
10786 for 64bits use SImode when possible, otherwise DImode.
10787 Set count to number of bytes copied when known at compile time. */
10788 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10789 || x86_64_zero_extended_value (count_exp))
10790 counter_mode = SImode;
10792 counter_mode = DImode;
10796 if (counter_mode != SImode && counter_mode != DImode)
10799 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10800 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10802 emit_insn (gen_cld ());
10804 /* When optimizing for size emit simple rep ; movsb instruction for
10805 counts not divisible by 4. */
10807 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10809 countreg = ix86_zero_extend_to_Pmode (count_exp);
10811 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10812 destreg, srcreg, countreg));
10814 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10815 destreg, srcreg, countreg));
10818 /* For constant aligned (or small unaligned) copies use rep movsl
10819 followed by code copying the rest. For PentiumPro ensure 8 byte
10820 alignment to allow rep movsl acceleration. */
10822 else if (count != 0
10824 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10825 || optimize_size || count < (unsigned int) 64))
10827 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10828 if (count & ~(size - 1))
10830 countreg = copy_to_mode_reg (counter_mode,
10831 GEN_INT ((count >> (size == 4 ? 2 : 3))
10832 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10833 countreg = ix86_zero_extend_to_Pmode (countreg);
10837 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10838 destreg, srcreg, countreg));
10840 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10841 destreg, srcreg, countreg));
10844 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10845 destreg, srcreg, countreg));
10847 if (size == 8 && (count & 0x04))
10848 emit_insn (gen_strmovsi (destreg, srcreg));
10850 emit_insn (gen_strmovhi (destreg, srcreg));
10852 emit_insn (gen_strmovqi (destreg, srcreg));
10854 /* The generic code based on the glibc implementation:
10855 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10856 allowing accelerated copying there)
10857 - copy the data using rep movsl
10858 - copy the rest. */
10863 int desired_alignment = (TARGET_PENTIUMPRO
10864 && (count == 0 || count >= (unsigned int) 260)
10865 ? 8 : UNITS_PER_WORD);
10867 /* In case we don't know anything about the alignment, default to
10868 library version, since it is usually equally fast and result in
10871 Also emit call when we know that the count is large and call overhead
10872 will not be important. */
10873 if (!TARGET_INLINE_ALL_STRINGOPS
10874 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10880 if (TARGET_SINGLE_STRINGOP)
10881 emit_insn (gen_cld ());
10883 countreg2 = gen_reg_rtx (Pmode);
10884 countreg = copy_to_mode_reg (counter_mode, count_exp);
10886 /* We don't use loops to align destination and to copy parts smaller
10887 than 4 bytes, because gcc is able to optimize such code better (in
10888 the case the destination or the count really is aligned, gcc is often
10889 able to predict the branches) and also it is friendlier to the
10890 hardware branch prediction.
10892 Using loops is beneficial for generic case, because we can
10893 handle small counts using the loops. Many CPUs (such as Athlon)
10894 have large REP prefix setup costs.
10896 This is quite costly. Maybe we can revisit this decision later or
10897 add some customizability to this code. */
10899 if (count == 0 && align < desired_alignment)
10901 label = gen_label_rtx ();
10902 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10903 LEU, 0, counter_mode, 1, label);
10907 rtx label = ix86_expand_aligntest (destreg, 1);
10908 emit_insn (gen_strmovqi (destreg, srcreg));
10909 ix86_adjust_counter (countreg, 1);
10910 emit_label (label);
10911 LABEL_NUSES (label) = 1;
10915 rtx label = ix86_expand_aligntest (destreg, 2);
10916 emit_insn (gen_strmovhi (destreg, srcreg));
10917 ix86_adjust_counter (countreg, 2);
10918 emit_label (label);
10919 LABEL_NUSES (label) = 1;
10921 if (align <= 4 && desired_alignment > 4)
10923 rtx label = ix86_expand_aligntest (destreg, 4);
10924 emit_insn (gen_strmovsi (destreg, srcreg));
10925 ix86_adjust_counter (countreg, 4);
10926 emit_label (label);
10927 LABEL_NUSES (label) = 1;
10930 if (label && desired_alignment > 4 && !TARGET_64BIT)
10932 emit_label (label);
10933 LABEL_NUSES (label) = 1;
10936 if (!TARGET_SINGLE_STRINGOP)
10937 emit_insn (gen_cld ());
10940 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10942 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10943 destreg, srcreg, countreg2));
10947 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10948 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10949 destreg, srcreg, countreg2));
10954 emit_label (label);
10955 LABEL_NUSES (label) = 1;
10957 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10958 emit_insn (gen_strmovsi (destreg, srcreg));
10959 if ((align <= 4 || count == 0) && TARGET_64BIT)
10961 rtx label = ix86_expand_aligntest (countreg, 4);
10962 emit_insn (gen_strmovsi (destreg, srcreg));
10963 emit_label (label);
10964 LABEL_NUSES (label) = 1;
10966 if (align > 2 && count != 0 && (count & 2))
10967 emit_insn (gen_strmovhi (destreg, srcreg));
10968 if (align <= 2 || count == 0)
10970 rtx label = ix86_expand_aligntest (countreg, 2);
10971 emit_insn (gen_strmovhi (destreg, srcreg));
10972 emit_label (label);
10973 LABEL_NUSES (label) = 1;
10975 if (align > 1 && count != 0 && (count & 1))
10976 emit_insn (gen_strmovqi (destreg, srcreg));
10977 if (align <= 1 || count == 0)
10979 rtx label = ix86_expand_aligntest (countreg, 1);
10980 emit_insn (gen_strmovqi (destreg, srcreg));
10981 emit_label (label);
10982 LABEL_NUSES (label) = 1;
10986 insns = get_insns ();
10989 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10994 /* Expand string clear operation (bzero). Use i386 string operations when
10995 profitable. expand_movstr contains similar code. */
10997 ix86_expand_clrstr (src, count_exp, align_exp)
10998 rtx src, count_exp, align_exp;
11000 rtx destreg, zeroreg, countreg;
11001 enum machine_mode counter_mode;
11002 HOST_WIDE_INT align = 0;
11003 unsigned HOST_WIDE_INT count = 0;
11005 if (GET_CODE (align_exp) == CONST_INT)
11006 align = INTVAL (align_exp);
11008 /* This simple hack avoids all inlining code and simplifies code below. */
11009 if (!TARGET_ALIGN_STRINGOPS)
11012 if (GET_CODE (count_exp) == CONST_INT)
11014 count = INTVAL (count_exp);
11015 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11018 /* Figure out proper mode for counter. For 32bits it is always SImode,
11019 for 64bits use SImode when possible, otherwise DImode.
11020 Set count to number of bytes copied when known at compile time. */
11021 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11022 || x86_64_zero_extended_value (count_exp))
11023 counter_mode = SImode;
11025 counter_mode = DImode;
11027 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11029 emit_insn (gen_cld ());
11031 /* When optimizing for size emit simple rep ; movsb instruction for
11032 counts not divisible by 4. */
11034 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11036 countreg = ix86_zero_extend_to_Pmode (count_exp);
11037 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11039 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11040 destreg, countreg));
11042 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11043 destreg, countreg));
11045 else if (count != 0
11047 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11048 || optimize_size || count < (unsigned int) 64))
11050 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11051 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11052 if (count & ~(size - 1))
11054 countreg = copy_to_mode_reg (counter_mode,
11055 GEN_INT ((count >> (size == 4 ? 2 : 3))
11056 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11057 countreg = ix86_zero_extend_to_Pmode (countreg);
11061 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11062 destreg, countreg));
11064 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11065 destreg, countreg));
11068 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11069 destreg, countreg));
11071 if (size == 8 && (count & 0x04))
11072 emit_insn (gen_strsetsi (destreg,
11073 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11075 emit_insn (gen_strsethi (destreg,
11076 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11078 emit_insn (gen_strsetqi (destreg,
11079 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11085 /* Compute desired alignment of the string operation. */
11086 int desired_alignment = (TARGET_PENTIUMPRO
11087 && (count == 0 || count >= (unsigned int) 260)
11088 ? 8 : UNITS_PER_WORD);
11090 /* In case we don't know anything about the alignment, default to
11091 library version, since it is usually equally fast and result in
11094 Also emit call when we know that the count is large and call overhead
11095 will not be important. */
11096 if (!TARGET_INLINE_ALL_STRINGOPS
11097 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11100 if (TARGET_SINGLE_STRINGOP)
11101 emit_insn (gen_cld ());
11103 countreg2 = gen_reg_rtx (Pmode);
11104 countreg = copy_to_mode_reg (counter_mode, count_exp);
11105 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11107 if (count == 0 && align < desired_alignment)
11109 label = gen_label_rtx ();
11110 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11111 LEU, 0, counter_mode, 1, label);
11115 rtx label = ix86_expand_aligntest (destreg, 1);
11116 emit_insn (gen_strsetqi (destreg,
11117 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11118 ix86_adjust_counter (countreg, 1);
11119 emit_label (label);
11120 LABEL_NUSES (label) = 1;
11124 rtx label = ix86_expand_aligntest (destreg, 2);
11125 emit_insn (gen_strsethi (destreg,
11126 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11127 ix86_adjust_counter (countreg, 2);
11128 emit_label (label);
11129 LABEL_NUSES (label) = 1;
11131 if (align <= 4 && desired_alignment > 4)
11133 rtx label = ix86_expand_aligntest (destreg, 4);
11134 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11135 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11137 ix86_adjust_counter (countreg, 4);
11138 emit_label (label);
11139 LABEL_NUSES (label) = 1;
11142 if (label && desired_alignment > 4 && !TARGET_64BIT)
11144 emit_label (label);
11145 LABEL_NUSES (label) = 1;
11149 if (!TARGET_SINGLE_STRINGOP)
11150 emit_insn (gen_cld ());
11153 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11155 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11156 destreg, countreg2));
11160 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11161 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11162 destreg, countreg2));
11166 emit_label (label);
11167 LABEL_NUSES (label) = 1;
11170 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11171 emit_insn (gen_strsetsi (destreg,
11172 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11173 if (TARGET_64BIT && (align <= 4 || count == 0))
11175 rtx label = ix86_expand_aligntest (countreg, 4);
11176 emit_insn (gen_strsetsi (destreg,
11177 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11178 emit_label (label);
11179 LABEL_NUSES (label) = 1;
11181 if (align > 2 && count != 0 && (count & 2))
11182 emit_insn (gen_strsethi (destreg,
11183 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11184 if (align <= 2 || count == 0)
11186 rtx label = ix86_expand_aligntest (countreg, 2);
11187 emit_insn (gen_strsethi (destreg,
11188 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11189 emit_label (label);
11190 LABEL_NUSES (label) = 1;
11192 if (align > 1 && count != 0 && (count & 1))
11193 emit_insn (gen_strsetqi (destreg,
11194 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11195 if (align <= 1 || count == 0)
11197 rtx label = ix86_expand_aligntest (countreg, 1);
11198 emit_insn (gen_strsetqi (destreg,
11199 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11200 emit_label (label);
11201 LABEL_NUSES (label) = 1;
11206 /* Expand strlen. */
11208 ix86_expand_strlen (out, src, eoschar, align)
11209 rtx out, src, eoschar, align;
11211 rtx addr, scratch1, scratch2, scratch3, scratch4;
11213 /* The generic case of strlen expander is long. Avoid it's
11214 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11216 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11217 && !TARGET_INLINE_ALL_STRINGOPS
11219 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11222 addr = force_reg (Pmode, XEXP (src, 0));
11223 scratch1 = gen_reg_rtx (Pmode);
11225 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11228 /* Well it seems that some optimizer does not combine a call like
11229 foo(strlen(bar), strlen(bar));
11230 when the move and the subtraction is done here. It does calculate
11231 the length just once when these instructions are done inside of
11232 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11233 often used and I use one fewer register for the lifetime of
11234 output_strlen_unroll() this is better. */
11236 emit_move_insn (out, addr);
11238 ix86_expand_strlensi_unroll_1 (out, align);
11240 /* strlensi_unroll_1 returns the address of the zero at the end of
11241 the string, like memchr(), so compute the length by subtracting
11242 the start address. */
11244 emit_insn (gen_subdi3 (out, out, addr));
11246 emit_insn (gen_subsi3 (out, out, addr));
11250 scratch2 = gen_reg_rtx (Pmode);
11251 scratch3 = gen_reg_rtx (Pmode);
11252 scratch4 = force_reg (Pmode, constm1_rtx);
11254 emit_move_insn (scratch3, addr);
11255 eoschar = force_reg (QImode, eoschar);
11257 emit_insn (gen_cld ());
11260 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11261 align, scratch4, scratch3));
11262 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11263 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11267 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11268 align, scratch4, scratch3));
11269 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11270 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11276 /* Expand the appropriate insns for doing strlen if not just doing
11279 out = result, initialized with the start address
11280 align_rtx = alignment of the address.
11281 scratch = scratch register, initialized with the startaddress when
11282 not aligned, otherwise undefined
11284 This is just the body. It needs the initialisations mentioned above and
11285 some address computing at the end. These things are done in i386.md. */
11288 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11289 rtx out, align_rtx;
11293 rtx align_2_label = NULL_RTX;
11294 rtx align_3_label = NULL_RTX;
11295 rtx align_4_label = gen_label_rtx ();
11296 rtx end_0_label = gen_label_rtx ();
11298 rtx tmpreg = gen_reg_rtx (SImode);
11299 rtx scratch = gen_reg_rtx (SImode);
11303 if (GET_CODE (align_rtx) == CONST_INT)
11304 align = INTVAL (align_rtx);
11306 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11308 /* Is there a known alignment and is it less than 4? */
11311 rtx scratch1 = gen_reg_rtx (Pmode);
11312 emit_move_insn (scratch1, out);
11313 /* Is there a known alignment and is it not 2? */
11316 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11317 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11319 /* Leave just the 3 lower bits. */
11320 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11321 NULL_RTX, 0, OPTAB_WIDEN);
11323 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11324 Pmode, 1, align_4_label);
11325 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11326 Pmode, 1, align_2_label);
11327 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11328 Pmode, 1, align_3_label);
11332 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11333 check if is aligned to 4 - byte. */
11335 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11336 NULL_RTX, 0, OPTAB_WIDEN);
11338 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11339 Pmode, 1, align_4_label);
11342 mem = gen_rtx_MEM (QImode, out);
11344 /* Now compare the bytes. */
11346 /* Compare the first n unaligned byte on a byte per byte basis. */
11347 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11348 QImode, 1, end_0_label);
11350 /* Increment the address. */
11352 emit_insn (gen_adddi3 (out, out, const1_rtx));
11354 emit_insn (gen_addsi3 (out, out, const1_rtx));
11356 /* Not needed with an alignment of 2 */
11359 emit_label (align_2_label);
11361 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11365 emit_insn (gen_adddi3 (out, out, const1_rtx));
11367 emit_insn (gen_addsi3 (out, out, const1_rtx));
11369 emit_label (align_3_label);
11372 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11376 emit_insn (gen_adddi3 (out, out, const1_rtx));
11378 emit_insn (gen_addsi3 (out, out, const1_rtx));
11381 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11382 align this loop. It gives only huge programs, but does not help to
11384 emit_label (align_4_label);
11386 mem = gen_rtx_MEM (SImode, out);
11387 emit_move_insn (scratch, mem);
11389 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11391 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11393 /* This formula yields a nonzero result iff one of the bytes is zero.
11394 This saves three branches inside loop and many cycles. */
11396 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11397 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11398 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11399 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11400 gen_int_mode (0x80808080, SImode)));
11401 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11406 rtx reg = gen_reg_rtx (SImode);
11407 rtx reg2 = gen_reg_rtx (Pmode);
11408 emit_move_insn (reg, tmpreg);
11409 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11411 /* If zero is not in the first two bytes, move two bytes forward. */
11412 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11413 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11414 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11415 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11416 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11419 /* Emit lea manually to avoid clobbering of flags. */
11420 emit_insn (gen_rtx_SET (SImode, reg2,
11421 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11423 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11424 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11425 emit_insn (gen_rtx_SET (VOIDmode, out,
11426 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11433 rtx end_2_label = gen_label_rtx ();
11434 /* Is zero in the first two bytes? */
11436 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11437 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11438 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11439 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11440 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11442 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11443 JUMP_LABEL (tmp) = end_2_label;
11445 /* Not in the first two. Move two bytes forward. */
11446 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11448 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11450 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11452 emit_label (end_2_label);
11456 /* Avoid branch in fixing the byte. */
11457 tmpreg = gen_lowpart (QImode, tmpreg);
11458 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11459 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11461 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11463 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11465 emit_label (end_0_label);
11469 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11470 rtx retval, fnaddr, callarg1, callarg2, pop;
11473 rtx use = NULL, call;
11475 if (pop == const0_rtx)
11477 if (TARGET_64BIT && pop)
11481 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11482 fnaddr = machopic_indirect_call_target (fnaddr);
11484 /* Static functions and indirect calls don't need the pic register. */
11485 if (! TARGET_64BIT && flag_pic
11486 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11487 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11488 use_reg (&use, pic_offset_table_rtx);
11490 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11492 rtx al = gen_rtx_REG (QImode, 0);
11493 emit_move_insn (al, callarg2);
11494 use_reg (&use, al);
11496 #endif /* TARGET_MACHO */
11498 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11500 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11501 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11503 if (sibcall && TARGET_64BIT
11504 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11507 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11508 fnaddr = gen_rtx_REG (Pmode, 40);
11509 emit_move_insn (fnaddr, addr);
11510 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11513 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11515 call = gen_rtx_SET (VOIDmode, retval, call);
11518 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11519 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11520 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11523 call = emit_call_insn (call);
11525 CALL_INSN_FUNCTION_USAGE (call) = use;
11529 /* Clear stack slot assignments remembered from previous functions.
11530 This is called from INIT_EXPANDERS once before RTL is emitted for each
11533 static struct machine_function *
11534 ix86_init_machine_status ()
11536 return ggc_alloc_cleared (sizeof (struct machine_function));
11539 /* Return a MEM corresponding to a stack slot with mode MODE.
11540 Allocate a new slot if necessary.
11542 The RTL for a function can have several slots available: N is
11543 which slot to use. */
11546 assign_386_stack_local (mode, n)
11547 enum machine_mode mode;
11550 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11553 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11554 ix86_stack_locals[(int) mode][n]
11555 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11557 return ix86_stack_locals[(int) mode][n];
11560 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11562 static GTY(()) rtx ix86_tls_symbol;
11564 ix86_tls_get_addr ()
11567 if (!ix86_tls_symbol)
11569 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11570 (TARGET_GNU_TLS && !TARGET_64BIT)
11571 ? "___tls_get_addr"
11572 : "__tls_get_addr");
11575 return ix86_tls_symbol;
11578 /* Calculate the length of the memory address in the instruction
11579 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11582 memory_address_length (addr)
11585 struct ix86_address parts;
11586 rtx base, index, disp;
11589 if (GET_CODE (addr) == PRE_DEC
11590 || GET_CODE (addr) == POST_INC
11591 || GET_CODE (addr) == PRE_MODIFY
11592 || GET_CODE (addr) == POST_MODIFY)
11595 if (! ix86_decompose_address (addr, &parts))
11599 index = parts.index;
11603 /* Register Indirect. */
11604 if (base && !index && !disp)
11606 /* Special cases: ebp and esp need the two-byte modrm form. */
11607 if (addr == stack_pointer_rtx
11608 || addr == arg_pointer_rtx
11609 || addr == frame_pointer_rtx
11610 || addr == hard_frame_pointer_rtx)
11614 /* Direct Addressing. */
11615 else if (disp && !base && !index)
11620 /* Find the length of the displacement constant. */
11623 if (GET_CODE (disp) == CONST_INT
11624 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11630 /* An index requires the two-byte modrm form. */
11638 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11639 is set, expect that insn have 8bit immediate alternative. */
11641 ix86_attr_length_immediate_default (insn, shortform)
11647 extract_insn_cached (insn);
11648 for (i = recog_data.n_operands - 1; i >= 0; --i)
11649 if (CONSTANT_P (recog_data.operand[i]))
11654 && GET_CODE (recog_data.operand[i]) == CONST_INT
11655 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11659 switch (get_attr_mode (insn))
11670 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11675 fatal_insn ("unknown insn mode", insn);
11681 /* Compute default value for "length_address" attribute. */
11683 ix86_attr_length_address_default (insn)
11687 extract_insn_cached (insn);
11688 for (i = recog_data.n_operands - 1; i >= 0; --i)
11689 if (GET_CODE (recog_data.operand[i]) == MEM)
11691 return memory_address_length (XEXP (recog_data.operand[i], 0));
11697 /* Return the maximum number of instructions a cpu can issue. */
11704 case PROCESSOR_PENTIUM:
11708 case PROCESSOR_PENTIUMPRO:
11709 case PROCESSOR_PENTIUM4:
11710 case PROCESSOR_ATHLON:
11719 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11720 by DEP_INSN and nothing set by DEP_INSN. */
11723 ix86_flags_dependant (insn, dep_insn, insn_type)
11724 rtx insn, dep_insn;
11725 enum attr_type insn_type;
11729 /* Simplify the test for uninteresting insns. */
11730 if (insn_type != TYPE_SETCC
11731 && insn_type != TYPE_ICMOV
11732 && insn_type != TYPE_FCMOV
11733 && insn_type != TYPE_IBR)
11736 if ((set = single_set (dep_insn)) != 0)
11738 set = SET_DEST (set);
11741 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11742 && XVECLEN (PATTERN (dep_insn), 0) == 2
11743 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11744 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11746 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11747 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11752 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11755 /* This test is true if the dependent insn reads the flags but
11756 not any other potentially set register. */
11757 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11760 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11766 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11767 address with operands set by DEP_INSN. */
11770 ix86_agi_dependant (insn, dep_insn, insn_type)
11771 rtx insn, dep_insn;
11772 enum attr_type insn_type;
11776 if (insn_type == TYPE_LEA
11779 addr = PATTERN (insn);
11780 if (GET_CODE (addr) == SET)
11782 else if (GET_CODE (addr) == PARALLEL
11783 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11784 addr = XVECEXP (addr, 0, 0);
11787 addr = SET_SRC (addr);
11792 extract_insn_cached (insn);
11793 for (i = recog_data.n_operands - 1; i >= 0; --i)
11794 if (GET_CODE (recog_data.operand[i]) == MEM)
11796 addr = XEXP (recog_data.operand[i], 0);
11803 return modified_in_p (addr, dep_insn);
11807 ix86_adjust_cost (insn, link, dep_insn, cost)
11808 rtx insn, link, dep_insn;
11811 enum attr_type insn_type, dep_insn_type;
11812 enum attr_memory memory, dep_memory;
11814 int dep_insn_code_number;
11816 /* Anti and output dependencies have zero cost on all CPUs. */
11817 if (REG_NOTE_KIND (link) != 0)
11820 dep_insn_code_number = recog_memoized (dep_insn);
11822 /* If we can't recognize the insns, we can't really do anything. */
11823 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11826 insn_type = get_attr_type (insn);
11827 dep_insn_type = get_attr_type (dep_insn);
11831 case PROCESSOR_PENTIUM:
11832 /* Address Generation Interlock adds a cycle of latency. */
11833 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11836 /* ??? Compares pair with jump/setcc. */
11837 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11840 /* Floating point stores require value to be ready one cycle earlier. */
11841 if (insn_type == TYPE_FMOV
11842 && get_attr_memory (insn) == MEMORY_STORE
11843 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11847 case PROCESSOR_PENTIUMPRO:
11848 memory = get_attr_memory (insn);
11849 dep_memory = get_attr_memory (dep_insn);
11851 /* Since we can't represent delayed latencies of load+operation,
11852 increase the cost here for non-imov insns. */
11853 if (dep_insn_type != TYPE_IMOV
11854 && dep_insn_type != TYPE_FMOV
11855 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11858 /* INT->FP conversion is expensive. */
11859 if (get_attr_fp_int_src (dep_insn))
11862 /* There is one cycle extra latency between an FP op and a store. */
11863 if (insn_type == TYPE_FMOV
11864 && (set = single_set (dep_insn)) != NULL_RTX
11865 && (set2 = single_set (insn)) != NULL_RTX
11866 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11867 && GET_CODE (SET_DEST (set2)) == MEM)
11870 /* Show ability of reorder buffer to hide latency of load by executing
11871 in parallel with previous instruction in case
11872 previous instruction is not needed to compute the address. */
11873 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11874 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11876 /* Claim moves to take one cycle, as core can issue one load
11877 at time and the next load can start cycle later. */
11878 if (dep_insn_type == TYPE_IMOV
11879 || dep_insn_type == TYPE_FMOV)
11887 memory = get_attr_memory (insn);
11888 dep_memory = get_attr_memory (dep_insn);
11889 /* The esp dependency is resolved before the instruction is really
11891 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11892 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11895 /* Since we can't represent delayed latencies of load+operation,
11896 increase the cost here for non-imov insns. */
11897 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11898 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11900 /* INT->FP conversion is expensive. */
11901 if (get_attr_fp_int_src (dep_insn))
11904 /* Show ability of reorder buffer to hide latency of load by executing
11905 in parallel with previous instruction in case
11906 previous instruction is not needed to compute the address. */
11907 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11908 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11910 /* Claim moves to take one cycle, as core can issue one load
11911 at time and the next load can start cycle later. */
11912 if (dep_insn_type == TYPE_IMOV
11913 || dep_insn_type == TYPE_FMOV)
11922 case PROCESSOR_ATHLON:
11924 memory = get_attr_memory (insn);
11925 dep_memory = get_attr_memory (dep_insn);
11927 /* Show ability of reorder buffer to hide latency of load by executing
11928 in parallel with previous instruction in case
11929 previous instruction is not needed to compute the address. */
11930 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11931 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11933 /* Claim moves to take one cycle, as core can issue one load
11934 at time and the next load can start cycle later. */
11935 if (dep_insn_type == TYPE_IMOV
11936 || dep_insn_type == TYPE_FMOV)
11938 else if (cost >= 3)
11953 struct ppro_sched_data
11956 int issued_this_cycle;
11960 static enum attr_ppro_uops
11961 ix86_safe_ppro_uops (insn)
11964 if (recog_memoized (insn) >= 0)
11965 return get_attr_ppro_uops (insn);
11967 return PPRO_UOPS_MANY;
11971 ix86_dump_ppro_packet (dump)
11974 if (ix86_sched_data.ppro.decode[0])
11976 fprintf (dump, "PPRO packet: %d",
11977 INSN_UID (ix86_sched_data.ppro.decode[0]));
11978 if (ix86_sched_data.ppro.decode[1])
11979 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11980 if (ix86_sched_data.ppro.decode[2])
11981 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11982 fputc ('\n', dump);
11986 /* We're beginning a new block. Initialize data structures as necessary. */
11989 ix86_sched_init (dump, sched_verbose, veclen)
11990 FILE *dump ATTRIBUTE_UNUSED;
11991 int sched_verbose ATTRIBUTE_UNUSED;
11992 int veclen ATTRIBUTE_UNUSED;
11994 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11997 /* Shift INSN to SLOT, and shift everything else down. */
12000 ix86_reorder_insn (insnp, slot)
12007 insnp[0] = insnp[1];
12008 while (++insnp != slot);
12014 ix86_sched_reorder_ppro (ready, e_ready)
12019 enum attr_ppro_uops cur_uops;
12020 int issued_this_cycle;
12024 /* At this point .ppro.decode contains the state of the three
12025 decoders from last "cycle". That is, those insns that were
12026 actually independent. But here we're scheduling for the
12027 decoder, and we may find things that are decodable in the
12030 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12031 issued_this_cycle = 0;
12034 cur_uops = ix86_safe_ppro_uops (*insnp);
12036 /* If the decoders are empty, and we've a complex insn at the
12037 head of the priority queue, let it issue without complaint. */
12038 if (decode[0] == NULL)
12040 if (cur_uops == PPRO_UOPS_MANY)
12042 decode[0] = *insnp;
12046 /* Otherwise, search for a 2-4 uop unsn to issue. */
12047 while (cur_uops != PPRO_UOPS_FEW)
12049 if (insnp == ready)
12051 cur_uops = ix86_safe_ppro_uops (*--insnp);
12054 /* If so, move it to the head of the line. */
12055 if (cur_uops == PPRO_UOPS_FEW)
12056 ix86_reorder_insn (insnp, e_ready);
12058 /* Issue the head of the queue. */
12059 issued_this_cycle = 1;
12060 decode[0] = *e_ready--;
12063 /* Look for simple insns to fill in the other two slots. */
12064 for (i = 1; i < 3; ++i)
12065 if (decode[i] == NULL)
12067 if (ready > e_ready)
12071 cur_uops = ix86_safe_ppro_uops (*insnp);
12072 while (cur_uops != PPRO_UOPS_ONE)
12074 if (insnp == ready)
12076 cur_uops = ix86_safe_ppro_uops (*--insnp);
12079 /* Found one. Move it to the head of the queue and issue it. */
12080 if (cur_uops == PPRO_UOPS_ONE)
12082 ix86_reorder_insn (insnp, e_ready);
12083 decode[i] = *e_ready--;
12084 issued_this_cycle++;
12088 /* ??? Didn't find one. Ideally, here we would do a lazy split
12089 of 2-uop insns, issue one and queue the other. */
12093 if (issued_this_cycle == 0)
12094 issued_this_cycle = 1;
12095 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12098 /* We are about to being issuing insns for this clock cycle.
12099 Override the default sort algorithm to better slot instructions. */
12101 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12102 FILE *dump ATTRIBUTE_UNUSED;
12103 int sched_verbose ATTRIBUTE_UNUSED;
12106 int clock_var ATTRIBUTE_UNUSED;
12108 int n_ready = *n_readyp;
12109 rtx *e_ready = ready + n_ready - 1;
12111 /* Make sure to go ahead and initialize key items in
12112 ix86_sched_data if we are not going to bother trying to
12113 reorder the ready queue. */
12116 ix86_sched_data.ppro.issued_this_cycle = 1;
12125 case PROCESSOR_PENTIUMPRO:
12126 ix86_sched_reorder_ppro (ready, e_ready);
12131 return ix86_issue_rate ();
12134 /* We are about to issue INSN. Return the number of insns left on the
12135 ready queue that can be issued this cycle. */
12138 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12142 int can_issue_more;
12148 return can_issue_more - 1;
12150 case PROCESSOR_PENTIUMPRO:
12152 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12154 if (uops == PPRO_UOPS_MANY)
12157 ix86_dump_ppro_packet (dump);
12158 ix86_sched_data.ppro.decode[0] = insn;
12159 ix86_sched_data.ppro.decode[1] = NULL;
12160 ix86_sched_data.ppro.decode[2] = NULL;
12162 ix86_dump_ppro_packet (dump);
12163 ix86_sched_data.ppro.decode[0] = NULL;
12165 else if (uops == PPRO_UOPS_FEW)
12168 ix86_dump_ppro_packet (dump);
12169 ix86_sched_data.ppro.decode[0] = insn;
12170 ix86_sched_data.ppro.decode[1] = NULL;
12171 ix86_sched_data.ppro.decode[2] = NULL;
12175 for (i = 0; i < 3; ++i)
12176 if (ix86_sched_data.ppro.decode[i] == NULL)
12178 ix86_sched_data.ppro.decode[i] = insn;
12186 ix86_dump_ppro_packet (dump);
12187 ix86_sched_data.ppro.decode[0] = NULL;
12188 ix86_sched_data.ppro.decode[1] = NULL;
12189 ix86_sched_data.ppro.decode[2] = NULL;
12193 return --ix86_sched_data.ppro.issued_this_cycle;
12198 ia32_use_dfa_pipeline_interface ()
12200 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12205 /* How many alternative schedules to try. This should be as wide as the
12206 scheduling freedom in the DFA, but no wider. Making this value too
12207 large results extra work for the scheduler. */
12210 ia32_multipass_dfa_lookahead ()
12212 if (ix86_cpu == PROCESSOR_PENTIUM)
12219 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12220 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12224 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12226 rtx dstref, srcref, dstreg, srcreg;
12230 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12232 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12236 /* Subroutine of above to actually do the updating by recursively walking
12240 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12242 rtx dstref, srcref, dstreg, srcreg;
12244 enum rtx_code code = GET_CODE (x);
12245 const char *format_ptr = GET_RTX_FORMAT (code);
12248 if (code == MEM && XEXP (x, 0) == dstreg)
12249 MEM_COPY_ATTRIBUTES (x, dstref);
12250 else if (code == MEM && XEXP (x, 0) == srcreg)
12251 MEM_COPY_ATTRIBUTES (x, srcref);
12253 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12255 if (*format_ptr == 'e')
12256 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12258 else if (*format_ptr == 'E')
12259 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12260 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12265 /* Compute the alignment given to a constant that is being placed in memory.
12266 EXP is the constant and ALIGN is the alignment that the object would
12268 The value of this function is used instead of that alignment to align
12272 ix86_constant_alignment (exp, align)
12276 if (TREE_CODE (exp) == REAL_CST)
12278 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12280 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12283 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12290 /* Compute the alignment for a static variable.
12291 TYPE is the data type, and ALIGN is the alignment that
12292 the object would ordinarily have. The value of this function is used
12293 instead of that alignment to align the object. */
12296 ix86_data_alignment (type, align)
12300 if (AGGREGATE_TYPE_P (type)
12301 && TYPE_SIZE (type)
12302 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12303 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12304 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12307 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12308 to 16byte boundary. */
12311 if (AGGREGATE_TYPE_P (type)
12312 && TYPE_SIZE (type)
12313 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12314 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12315 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12319 if (TREE_CODE (type) == ARRAY_TYPE)
12321 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12323 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12326 else if (TREE_CODE (type) == COMPLEX_TYPE)
12329 if (TYPE_MODE (type) == DCmode && align < 64)
12331 if (TYPE_MODE (type) == XCmode && align < 128)
12334 else if ((TREE_CODE (type) == RECORD_TYPE
12335 || TREE_CODE (type) == UNION_TYPE
12336 || TREE_CODE (type) == QUAL_UNION_TYPE)
12337 && TYPE_FIELDS (type))
12339 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12341 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12344 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12345 || TREE_CODE (type) == INTEGER_TYPE)
12347 if (TYPE_MODE (type) == DFmode && align < 64)
12349 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12356 /* Compute the alignment for a local variable.
12357 TYPE is the data type, and ALIGN is the alignment that
12358 the object would ordinarily have. The value of this macro is used
12359 instead of that alignment to align the object. */
12362 ix86_local_alignment (type, align)
12366 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12367 to 16byte boundary. */
12370 if (AGGREGATE_TYPE_P (type)
12371 && TYPE_SIZE (type)
12372 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12373 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12374 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12377 if (TREE_CODE (type) == ARRAY_TYPE)
12379 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12381 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12384 else if (TREE_CODE (type) == COMPLEX_TYPE)
12386 if (TYPE_MODE (type) == DCmode && align < 64)
12388 if (TYPE_MODE (type) == XCmode && align < 128)
12391 else if ((TREE_CODE (type) == RECORD_TYPE
12392 || TREE_CODE (type) == UNION_TYPE
12393 || TREE_CODE (type) == QUAL_UNION_TYPE)
12394 && TYPE_FIELDS (type))
12396 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12398 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12401 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12402 || TREE_CODE (type) == INTEGER_TYPE)
12405 if (TYPE_MODE (type) == DFmode && align < 64)
12407 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12413 /* Emit RTL insns to initialize the variable parts of a trampoline.
12414 FNADDR is an RTX for the address of the function's pure code.
12415 CXT is an RTX for the static chain value for the function. */
12417 x86_initialize_trampoline (tramp, fnaddr, cxt)
12418 rtx tramp, fnaddr, cxt;
12422 /* Compute offset from the end of the jmp to the target function. */
12423 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12424 plus_constant (tramp, 10),
12425 NULL_RTX, 1, OPTAB_DIRECT);
12426 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12427 gen_int_mode (0xb9, QImode));
12428 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12429 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12430 gen_int_mode (0xe9, QImode));
12431 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12436 /* Try to load address using shorter movl instead of movabs.
12437 We may want to support movq for kernel mode, but kernel does not use
12438 trampolines at the moment. */
12439 if (x86_64_zero_extended_value (fnaddr))
12441 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12442 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12443 gen_int_mode (0xbb41, HImode));
12444 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12445 gen_lowpart (SImode, fnaddr));
12450 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12451 gen_int_mode (0xbb49, HImode));
12452 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12456 /* Load static chain using movabs to r10. */
12457 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12458 gen_int_mode (0xba49, HImode));
12459 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12462 /* Jump to the r11 */
12463 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12464 gen_int_mode (0xff49, HImode));
12465 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12466 gen_int_mode (0xe3, QImode));
12468 if (offset > TRAMPOLINE_SIZE)
12472 #ifdef TRANSFER_FROM_TRAMPOLINE
12473 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12474 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12478 #define def_builtin(MASK, NAME, TYPE, CODE) \
12480 if ((MASK) & target_flags) \
12481 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12482 NULL, NULL_TREE); \
12485 struct builtin_description
12487 const unsigned int mask;
12488 const enum insn_code icode;
12489 const char *const name;
12490 const enum ix86_builtins code;
12491 const enum rtx_code comparison;
12492 const unsigned int flag;
12495 /* Used for builtins that are enabled both by -msse and -msse2. */
12496 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12498 static const struct builtin_description bdesc_comi[] =
12500 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12501 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12502 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12503 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12504 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12505 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12506 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12507 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12508 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12509 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12510 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12511 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12512 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12513 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12514 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12515 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12516 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12517 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12518 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12519 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12520 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12521 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12522 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12523 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12526 static const struct builtin_description bdesc_2arg[] =
12529 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12530 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12531 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12532 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12533 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12534 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12535 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12536 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12538 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12539 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12540 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12541 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12542 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12543 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12544 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12545 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12546 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12547 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12548 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12549 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12550 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12551 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12552 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12553 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12554 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12555 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12556 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12557 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12559 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12560 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12561 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12562 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12564 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12565 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12566 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12567 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12569 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12570 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12571 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12572 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12573 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12576 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12577 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12578 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12579 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12580 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12581 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12583 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12584 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12585 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12586 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12587 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12588 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12589 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12590 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12592 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12593 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12594 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12596 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12597 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12598 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12599 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12601 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12602 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12604 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12605 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12606 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12607 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12608 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12609 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12611 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12612 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12613 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12614 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12616 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12617 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12618 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12619 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12620 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12621 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12624 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12625 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12628 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12629 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12631 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12632 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12633 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12634 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12635 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12636 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12638 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12639 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12640 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12641 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12642 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12643 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12645 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12646 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12647 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12648 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12650 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12651 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12654 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12655 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12656 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12657 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12658 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12659 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12660 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12664 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12665 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12666 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12667 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12668 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12669 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12670 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12671 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12672 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12673 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12674 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12675 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12676 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12677 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12678 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12679 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12680 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12681 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12682 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12684 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12685 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12686 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12687 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12689 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12690 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12691 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12696 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12701 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12704 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12706 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12708 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12709 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12710 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12711 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12712 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12713 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12714 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12715 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12717 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12718 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12719 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12720 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12722 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12731 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12734 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12737 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12744 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12746 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12755 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12756 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12758 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12759 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12762 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12763 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12765 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12769 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12770 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12772 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12781 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12784 static const struct builtin_description bdesc_1arg[] =
12786 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12787 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12789 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12790 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12791 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12793 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12794 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12795 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12796 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12809 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12812 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12820 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12823 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12827 ix86_init_builtins ()
12830 ix86_init_mmx_sse_builtins ();
12833 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12834 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12837 ix86_init_mmx_sse_builtins ()
12839 const struct builtin_description * d;
12842 tree pchar_type_node = build_pointer_type (char_type_node);
12843 tree pcchar_type_node = build_pointer_type (
12844 build_type_variant (char_type_node, 1, 0));
12845 tree pfloat_type_node = build_pointer_type (float_type_node);
12846 tree pcfloat_type_node = build_pointer_type (
12847 build_type_variant (float_type_node, 1, 0));
12848 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12849 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12850 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12853 tree int_ftype_v4sf_v4sf
12854 = build_function_type_list (integer_type_node,
12855 V4SF_type_node, V4SF_type_node, NULL_TREE);
12856 tree v4si_ftype_v4sf_v4sf
12857 = build_function_type_list (V4SI_type_node,
12858 V4SF_type_node, V4SF_type_node, NULL_TREE);
12859 /* MMX/SSE/integer conversions. */
12860 tree int_ftype_v4sf
12861 = build_function_type_list (integer_type_node,
12862 V4SF_type_node, NULL_TREE);
12863 tree int_ftype_v8qi
12864 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12865 tree v4sf_ftype_v4sf_int
12866 = build_function_type_list (V4SF_type_node,
12867 V4SF_type_node, integer_type_node, NULL_TREE);
12868 tree v4sf_ftype_v4sf_v2si
12869 = build_function_type_list (V4SF_type_node,
12870 V4SF_type_node, V2SI_type_node, NULL_TREE);
12871 tree int_ftype_v4hi_int
12872 = build_function_type_list (integer_type_node,
12873 V4HI_type_node, integer_type_node, NULL_TREE);
12874 tree v4hi_ftype_v4hi_int_int
12875 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12876 integer_type_node, integer_type_node,
12878 /* Miscellaneous. */
12879 tree v8qi_ftype_v4hi_v4hi
12880 = build_function_type_list (V8QI_type_node,
12881 V4HI_type_node, V4HI_type_node, NULL_TREE);
12882 tree v4hi_ftype_v2si_v2si
12883 = build_function_type_list (V4HI_type_node,
12884 V2SI_type_node, V2SI_type_node, NULL_TREE);
12885 tree v4sf_ftype_v4sf_v4sf_int
12886 = build_function_type_list (V4SF_type_node,
12887 V4SF_type_node, V4SF_type_node,
12888 integer_type_node, NULL_TREE);
12889 tree v2si_ftype_v4hi_v4hi
12890 = build_function_type_list (V2SI_type_node,
12891 V4HI_type_node, V4HI_type_node, NULL_TREE);
12892 tree v4hi_ftype_v4hi_int
12893 = build_function_type_list (V4HI_type_node,
12894 V4HI_type_node, integer_type_node, NULL_TREE);
12895 tree v4hi_ftype_v4hi_di
12896 = build_function_type_list (V4HI_type_node,
12897 V4HI_type_node, long_long_unsigned_type_node,
12899 tree v2si_ftype_v2si_di
12900 = build_function_type_list (V2SI_type_node,
12901 V2SI_type_node, long_long_unsigned_type_node,
12903 tree void_ftype_void
12904 = build_function_type (void_type_node, void_list_node);
12905 tree void_ftype_unsigned
12906 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12907 tree unsigned_ftype_void
12908 = build_function_type (unsigned_type_node, void_list_node);
12910 = build_function_type (long_long_unsigned_type_node, void_list_node);
12911 tree v4sf_ftype_void
12912 = build_function_type (V4SF_type_node, void_list_node);
12913 tree v2si_ftype_v4sf
12914 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12915 /* Loads/stores. */
12916 tree void_ftype_v8qi_v8qi_pchar
12917 = build_function_type_list (void_type_node,
12918 V8QI_type_node, V8QI_type_node,
12919 pchar_type_node, NULL_TREE);
12920 tree v4sf_ftype_pcfloat
12921 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12922 /* @@@ the type is bogus */
12923 tree v4sf_ftype_v4sf_pv2si
12924 = build_function_type_list (V4SF_type_node,
12925 V4SF_type_node, pv2si_type_node, NULL_TREE);
12926 tree void_ftype_pv2si_v4sf
12927 = build_function_type_list (void_type_node,
12928 pv2si_type_node, V4SF_type_node, NULL_TREE);
12929 tree void_ftype_pfloat_v4sf
12930 = build_function_type_list (void_type_node,
12931 pfloat_type_node, V4SF_type_node, NULL_TREE);
12932 tree void_ftype_pdi_di
12933 = build_function_type_list (void_type_node,
12934 pdi_type_node, long_long_unsigned_type_node,
12936 tree void_ftype_pv2di_v2di
12937 = build_function_type_list (void_type_node,
12938 pv2di_type_node, V2DI_type_node, NULL_TREE);
12939 /* Normal vector unops. */
12940 tree v4sf_ftype_v4sf
12941 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12943 /* Normal vector binops. */
12944 tree v4sf_ftype_v4sf_v4sf
12945 = build_function_type_list (V4SF_type_node,
12946 V4SF_type_node, V4SF_type_node, NULL_TREE);
12947 tree v8qi_ftype_v8qi_v8qi
12948 = build_function_type_list (V8QI_type_node,
12949 V8QI_type_node, V8QI_type_node, NULL_TREE);
12950 tree v4hi_ftype_v4hi_v4hi
12951 = build_function_type_list (V4HI_type_node,
12952 V4HI_type_node, V4HI_type_node, NULL_TREE);
12953 tree v2si_ftype_v2si_v2si
12954 = build_function_type_list (V2SI_type_node,
12955 V2SI_type_node, V2SI_type_node, NULL_TREE);
12956 tree di_ftype_di_di
12957 = build_function_type_list (long_long_unsigned_type_node,
12958 long_long_unsigned_type_node,
12959 long_long_unsigned_type_node, NULL_TREE);
12961 tree v2si_ftype_v2sf
12962 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12963 tree v2sf_ftype_v2si
12964 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12965 tree v2si_ftype_v2si
12966 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12967 tree v2sf_ftype_v2sf
12968 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12969 tree v2sf_ftype_v2sf_v2sf
12970 = build_function_type_list (V2SF_type_node,
12971 V2SF_type_node, V2SF_type_node, NULL_TREE);
12972 tree v2si_ftype_v2sf_v2sf
12973 = build_function_type_list (V2SI_type_node,
12974 V2SF_type_node, V2SF_type_node, NULL_TREE);
12975 tree pint_type_node = build_pointer_type (integer_type_node);
12976 tree pcint_type_node = build_pointer_type (
12977 build_type_variant (integer_type_node, 1, 0));
12978 tree pdouble_type_node = build_pointer_type (double_type_node);
12979 tree pcdouble_type_node = build_pointer_type (
12980 build_type_variant (double_type_node, 1, 0));
12981 tree int_ftype_v2df_v2df
12982 = build_function_type_list (integer_type_node,
12983 V2DF_type_node, V2DF_type_node, NULL_TREE);
12986 = build_function_type (intTI_type_node, void_list_node);
12987 tree v2di_ftype_void
12988 = build_function_type (V2DI_type_node, void_list_node);
12989 tree ti_ftype_ti_ti
12990 = build_function_type_list (intTI_type_node,
12991 intTI_type_node, intTI_type_node, NULL_TREE);
12992 tree void_ftype_pcvoid
12993 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12995 = build_function_type_list (V2DI_type_node,
12996 long_long_unsigned_type_node, NULL_TREE);
12998 = build_function_type_list (long_long_unsigned_type_node,
12999 V2DI_type_node, NULL_TREE);
13000 tree v4sf_ftype_v4si
13001 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13002 tree v4si_ftype_v4sf
13003 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13004 tree v2df_ftype_v4si
13005 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13006 tree v4si_ftype_v2df
13007 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13008 tree v2si_ftype_v2df
13009 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13010 tree v4sf_ftype_v2df
13011 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13012 tree v2df_ftype_v2si
13013 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13014 tree v2df_ftype_v4sf
13015 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13016 tree int_ftype_v2df
13017 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13018 tree v2df_ftype_v2df_int
13019 = build_function_type_list (V2DF_type_node,
13020 V2DF_type_node, integer_type_node, NULL_TREE);
13021 tree v4sf_ftype_v4sf_v2df
13022 = build_function_type_list (V4SF_type_node,
13023 V4SF_type_node, V2DF_type_node, NULL_TREE);
13024 tree v2df_ftype_v2df_v4sf
13025 = build_function_type_list (V2DF_type_node,
13026 V2DF_type_node, V4SF_type_node, NULL_TREE);
13027 tree v2df_ftype_v2df_v2df_int
13028 = build_function_type_list (V2DF_type_node,
13029 V2DF_type_node, V2DF_type_node,
13032 tree v2df_ftype_v2df_pv2si
13033 = build_function_type_list (V2DF_type_node,
13034 V2DF_type_node, pv2si_type_node, NULL_TREE);
13035 tree void_ftype_pv2si_v2df
13036 = build_function_type_list (void_type_node,
13037 pv2si_type_node, V2DF_type_node, NULL_TREE);
13038 tree void_ftype_pdouble_v2df
13039 = build_function_type_list (void_type_node,
13040 pdouble_type_node, V2DF_type_node, NULL_TREE);
13041 tree void_ftype_pint_int
13042 = build_function_type_list (void_type_node,
13043 pint_type_node, integer_type_node, NULL_TREE);
13044 tree void_ftype_v16qi_v16qi_pchar
13045 = build_function_type_list (void_type_node,
13046 V16QI_type_node, V16QI_type_node,
13047 pchar_type_node, NULL_TREE);
13048 tree v2df_ftype_pcdouble
13049 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13050 tree v2df_ftype_v2df_v2df
13051 = build_function_type_list (V2DF_type_node,
13052 V2DF_type_node, V2DF_type_node, NULL_TREE);
13053 tree v16qi_ftype_v16qi_v16qi
13054 = build_function_type_list (V16QI_type_node,
13055 V16QI_type_node, V16QI_type_node, NULL_TREE);
13056 tree v8hi_ftype_v8hi_v8hi
13057 = build_function_type_list (V8HI_type_node,
13058 V8HI_type_node, V8HI_type_node, NULL_TREE);
13059 tree v4si_ftype_v4si_v4si
13060 = build_function_type_list (V4SI_type_node,
13061 V4SI_type_node, V4SI_type_node, NULL_TREE);
13062 tree v2di_ftype_v2di_v2di
13063 = build_function_type_list (V2DI_type_node,
13064 V2DI_type_node, V2DI_type_node, NULL_TREE);
13065 tree v2di_ftype_v2df_v2df
13066 = build_function_type_list (V2DI_type_node,
13067 V2DF_type_node, V2DF_type_node, NULL_TREE);
13068 tree v2df_ftype_v2df
13069 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13070 tree v2df_ftype_double
13071 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13072 tree v2df_ftype_double_double
13073 = build_function_type_list (V2DF_type_node,
13074 double_type_node, double_type_node, NULL_TREE);
13075 tree int_ftype_v8hi_int
13076 = build_function_type_list (integer_type_node,
13077 V8HI_type_node, integer_type_node, NULL_TREE);
13078 tree v8hi_ftype_v8hi_int_int
13079 = build_function_type_list (V8HI_type_node,
13080 V8HI_type_node, integer_type_node,
13081 integer_type_node, NULL_TREE);
13082 tree v2di_ftype_v2di_int
13083 = build_function_type_list (V2DI_type_node,
13084 V2DI_type_node, integer_type_node, NULL_TREE);
13085 tree v4si_ftype_v4si_int
13086 = build_function_type_list (V4SI_type_node,
13087 V4SI_type_node, integer_type_node, NULL_TREE);
13088 tree v8hi_ftype_v8hi_int
13089 = build_function_type_list (V8HI_type_node,
13090 V8HI_type_node, integer_type_node, NULL_TREE);
13091 tree v8hi_ftype_v8hi_v2di
13092 = build_function_type_list (V8HI_type_node,
13093 V8HI_type_node, V2DI_type_node, NULL_TREE);
13094 tree v4si_ftype_v4si_v2di
13095 = build_function_type_list (V4SI_type_node,
13096 V4SI_type_node, V2DI_type_node, NULL_TREE);
13097 tree v4si_ftype_v8hi_v8hi
13098 = build_function_type_list (V4SI_type_node,
13099 V8HI_type_node, V8HI_type_node, NULL_TREE);
13100 tree di_ftype_v8qi_v8qi
13101 = build_function_type_list (long_long_unsigned_type_node,
13102 V8QI_type_node, V8QI_type_node, NULL_TREE);
13103 tree v2di_ftype_v16qi_v16qi
13104 = build_function_type_list (V2DI_type_node,
13105 V16QI_type_node, V16QI_type_node, NULL_TREE);
13106 tree int_ftype_v16qi
13107 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13108 tree v16qi_ftype_pcchar
13109 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13110 tree void_ftype_pchar_v16qi
13111 = build_function_type_list (void_type_node,
13112 pchar_type_node, V16QI_type_node, NULL_TREE);
13113 tree v4si_ftype_pcint
13114 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13115 tree void_ftype_pcint_v4si
13116 = build_function_type_list (void_type_node,
13117 pcint_type_node, V4SI_type_node, NULL_TREE);
13118 tree v2di_ftype_v2di
13119 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13121 /* Add all builtins that are more or less simple operations on two
13123 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13125 /* Use one of the operands; the target can have a different mode for
13126 mask-generating compares. */
13127 enum machine_mode mode;
13132 mode = insn_data[d->icode].operand[1].mode;
13137 type = v16qi_ftype_v16qi_v16qi;
13140 type = v8hi_ftype_v8hi_v8hi;
13143 type = v4si_ftype_v4si_v4si;
13146 type = v2di_ftype_v2di_v2di;
13149 type = v2df_ftype_v2df_v2df;
13152 type = ti_ftype_ti_ti;
13155 type = v4sf_ftype_v4sf_v4sf;
13158 type = v8qi_ftype_v8qi_v8qi;
13161 type = v4hi_ftype_v4hi_v4hi;
13164 type = v2si_ftype_v2si_v2si;
13167 type = di_ftype_di_di;
13174 /* Override for comparisons. */
13175 if (d->icode == CODE_FOR_maskcmpv4sf3
13176 || d->icode == CODE_FOR_maskncmpv4sf3
13177 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13178 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13179 type = v4si_ftype_v4sf_v4sf;
13181 if (d->icode == CODE_FOR_maskcmpv2df3
13182 || d->icode == CODE_FOR_maskncmpv2df3
13183 || d->icode == CODE_FOR_vmmaskcmpv2df3
13184 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13185 type = v2di_ftype_v2df_v2df;
13187 def_builtin (d->mask, d->name, type, d->code);
13190 /* Add the remaining MMX insns with somewhat more complicated types. */
13191 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13192 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13193 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13194 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13195 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13197 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13198 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13199 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13201 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13202 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13204 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13205 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13207 /* comi/ucomi insns. */
13208 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13209 if (d->mask == MASK_SSE2)
13210 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13212 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13214 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13215 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13216 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13218 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13219 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13220 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13221 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13222 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13223 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13224 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13225 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13227 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13228 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13230 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13232 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13233 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13234 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13235 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13236 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13237 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13239 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13240 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13241 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13242 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13244 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13245 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13246 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13247 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13249 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13251 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13253 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13254 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13255 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13256 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13257 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13258 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13260 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13262 /* Original 3DNow! */
13263 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13264 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13265 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13266 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13267 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13268 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13269 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13270 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13284 /* 3DNow! extension as used in the Athlon CPU. */
13285 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13286 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13287 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13288 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13289 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13292 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13295 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13296 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13298 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13299 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13300 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13302 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13303 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13304 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13305 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13306 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13307 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13309 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13311 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13312 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13316 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13318 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13320 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13321 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13323 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13325 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13330 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13334 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13335 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13336 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13337 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13344 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13345 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13346 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13348 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13352 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13355 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13356 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13357 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13360 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13362 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13364 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13366 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13367 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13368 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13372 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13374 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13376 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13378 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13379 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13380 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13382 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13383 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13385 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13386 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13390 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13391 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13392 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13393 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13398 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13401 /* Errors in the source file can cause expand_expr to return const0_rtx
13402 where we expect a vector. To avoid crashing, use one of the vector
13403 clear instructions. */
13405 safe_vector_operand (x, mode)
13407 enum machine_mode mode;
13409 if (x != const0_rtx)
13411 x = gen_reg_rtx (mode);
13413 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13414 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13415 : gen_rtx_SUBREG (DImode, x, 0)));
13417 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13418 : gen_rtx_SUBREG (V4SFmode, x, 0),
13419 CONST0_RTX (V4SFmode)));
13423 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13426 ix86_expand_binop_builtin (icode, arglist, target)
13427 enum insn_code icode;
13432 tree arg0 = TREE_VALUE (arglist);
13433 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13434 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13435 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13436 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13437 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13438 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13440 if (VECTOR_MODE_P (mode0))
13441 op0 = safe_vector_operand (op0, mode0);
13442 if (VECTOR_MODE_P (mode1))
13443 op1 = safe_vector_operand (op1, mode1);
13446 || GET_MODE (target) != tmode
13447 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13448 target = gen_reg_rtx (tmode);
13450 if (GET_MODE (op1) == SImode && mode1 == TImode)
13452 rtx x = gen_reg_rtx (V4SImode);
13453 emit_insn (gen_sse2_loadd (x, op1));
13454 op1 = gen_lowpart (TImode, x);
13457 /* In case the insn wants input operands in modes different from
13458 the result, abort. */
13459 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13462 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13463 op0 = copy_to_mode_reg (mode0, op0);
13464 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13465 op1 = copy_to_mode_reg (mode1, op1);
13467 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13468 yet one of the two must not be a memory. This is normally enforced
13469 by expanders, but we didn't bother to create one here. */
13470 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13471 op0 = copy_to_mode_reg (mode0, op0);
13473 pat = GEN_FCN (icode) (target, op0, op1);
13480 /* Subroutine of ix86_expand_builtin to take care of stores. */
13483 ix86_expand_store_builtin (icode, arglist)
13484 enum insn_code icode;
13488 tree arg0 = TREE_VALUE (arglist);
13489 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13490 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13491 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13492 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13493 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13495 if (VECTOR_MODE_P (mode1))
13496 op1 = safe_vector_operand (op1, mode1);
13498 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13499 op1 = copy_to_mode_reg (mode1, op1);
13501 pat = GEN_FCN (icode) (op0, op1);
13507 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13510 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13511 enum insn_code icode;
13517 tree arg0 = TREE_VALUE (arglist);
13518 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13519 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13520 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13523 || GET_MODE (target) != tmode
13524 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13525 target = gen_reg_rtx (tmode);
13527 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13530 if (VECTOR_MODE_P (mode0))
13531 op0 = safe_vector_operand (op0, mode0);
13533 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13534 op0 = copy_to_mode_reg (mode0, op0);
13537 pat = GEN_FCN (icode) (target, op0);
13544 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13545 sqrtss, rsqrtss, rcpss. */
13548 ix86_expand_unop1_builtin (icode, arglist, target)
13549 enum insn_code icode;
13554 tree arg0 = TREE_VALUE (arglist);
13555 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13556 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13557 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13560 || GET_MODE (target) != tmode
13561 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13562 target = gen_reg_rtx (tmode);
13564 if (VECTOR_MODE_P (mode0))
13565 op0 = safe_vector_operand (op0, mode0);
13567 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13568 op0 = copy_to_mode_reg (mode0, op0);
13571 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13572 op1 = copy_to_mode_reg (mode0, op1);
13574 pat = GEN_FCN (icode) (target, op0, op1);
13581 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13584 ix86_expand_sse_compare (d, arglist, target)
13585 const struct builtin_description *d;
13590 tree arg0 = TREE_VALUE (arglist);
13591 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13592 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13593 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13595 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13596 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13597 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13598 enum rtx_code comparison = d->comparison;
13600 if (VECTOR_MODE_P (mode0))
13601 op0 = safe_vector_operand (op0, mode0);
13602 if (VECTOR_MODE_P (mode1))
13603 op1 = safe_vector_operand (op1, mode1);
13605 /* Swap operands if we have a comparison that isn't available in
13609 rtx tmp = gen_reg_rtx (mode1);
13610 emit_move_insn (tmp, op1);
13616 || GET_MODE (target) != tmode
13617 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13618 target = gen_reg_rtx (tmode);
13620 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13621 op0 = copy_to_mode_reg (mode0, op0);
13622 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13623 op1 = copy_to_mode_reg (mode1, op1);
13625 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13626 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13633 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13636 ix86_expand_sse_comi (d, arglist, target)
13637 const struct builtin_description *d;
13642 tree arg0 = TREE_VALUE (arglist);
13643 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13644 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13645 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13647 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13648 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13649 enum rtx_code comparison = d->comparison;
13651 if (VECTOR_MODE_P (mode0))
13652 op0 = safe_vector_operand (op0, mode0);
13653 if (VECTOR_MODE_P (mode1))
13654 op1 = safe_vector_operand (op1, mode1);
13656 /* Swap operands if we have a comparison that isn't available in
13665 target = gen_reg_rtx (SImode);
13666 emit_move_insn (target, const0_rtx);
13667 target = gen_rtx_SUBREG (QImode, target, 0);
13669 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13670 op0 = copy_to_mode_reg (mode0, op0);
13671 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13672 op1 = copy_to_mode_reg (mode1, op1);
13674 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13675 pat = GEN_FCN (d->icode) (op0, op1);
13679 emit_insn (gen_rtx_SET (VOIDmode,
13680 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13681 gen_rtx_fmt_ee (comparison, QImode,
13685 return SUBREG_REG (target);
13688 /* Expand an expression EXP that calls a built-in function,
13689 with result going to TARGET if that's convenient
13690 (and in mode MODE if that's convenient).
13691 SUBTARGET may be used as the target for computing one of EXP's operands.
13692 IGNORE is nonzero if the value is to be ignored. */
13695 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13698 rtx subtarget ATTRIBUTE_UNUSED;
13699 enum machine_mode mode ATTRIBUTE_UNUSED;
13700 int ignore ATTRIBUTE_UNUSED;
13702 const struct builtin_description *d;
13704 enum insn_code icode;
13705 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13706 tree arglist = TREE_OPERAND (exp, 1);
13707 tree arg0, arg1, arg2;
13708 rtx op0, op1, op2, pat;
13709 enum machine_mode tmode, mode0, mode1, mode2;
13710 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13714 case IX86_BUILTIN_EMMS:
13715 emit_insn (gen_emms ());
13718 case IX86_BUILTIN_SFENCE:
13719 emit_insn (gen_sfence ());
13722 case IX86_BUILTIN_PEXTRW:
13723 case IX86_BUILTIN_PEXTRW128:
13724 icode = (fcode == IX86_BUILTIN_PEXTRW
13725 ? CODE_FOR_mmx_pextrw
13726 : CODE_FOR_sse2_pextrw);
13727 arg0 = TREE_VALUE (arglist);
13728 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13729 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13731 tmode = insn_data[icode].operand[0].mode;
13732 mode0 = insn_data[icode].operand[1].mode;
13733 mode1 = insn_data[icode].operand[2].mode;
13735 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13736 op0 = copy_to_mode_reg (mode0, op0);
13737 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13739 /* @@@ better error message */
13740 error ("selector must be an immediate");
13741 return gen_reg_rtx (tmode);
13744 || GET_MODE (target) != tmode
13745 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13746 target = gen_reg_rtx (tmode);
13747 pat = GEN_FCN (icode) (target, op0, op1);
13753 case IX86_BUILTIN_PINSRW:
13754 case IX86_BUILTIN_PINSRW128:
13755 icode = (fcode == IX86_BUILTIN_PINSRW
13756 ? CODE_FOR_mmx_pinsrw
13757 : CODE_FOR_sse2_pinsrw);
13758 arg0 = TREE_VALUE (arglist);
13759 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13760 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13761 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13762 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13763 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13764 tmode = insn_data[icode].operand[0].mode;
13765 mode0 = insn_data[icode].operand[1].mode;
13766 mode1 = insn_data[icode].operand[2].mode;
13767 mode2 = insn_data[icode].operand[3].mode;
13769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13770 op0 = copy_to_mode_reg (mode0, op0);
13771 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13772 op1 = copy_to_mode_reg (mode1, op1);
13773 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13775 /* @@@ better error message */
13776 error ("selector must be an immediate");
13780 || GET_MODE (target) != tmode
13781 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13782 target = gen_reg_rtx (tmode);
13783 pat = GEN_FCN (icode) (target, op0, op1, op2);
13789 case IX86_BUILTIN_MASKMOVQ:
13790 case IX86_BUILTIN_MASKMOVDQU:
13791 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13792 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13793 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13794 : CODE_FOR_sse2_maskmovdqu));
13795 /* Note the arg order is different from the operand order. */
13796 arg1 = TREE_VALUE (arglist);
13797 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13798 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13799 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13800 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13801 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13802 mode0 = insn_data[icode].operand[0].mode;
13803 mode1 = insn_data[icode].operand[1].mode;
13804 mode2 = insn_data[icode].operand[2].mode;
13806 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13807 op0 = copy_to_mode_reg (mode0, op0);
13808 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13809 op1 = copy_to_mode_reg (mode1, op1);
13810 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13811 op2 = copy_to_mode_reg (mode2, op2);
13812 pat = GEN_FCN (icode) (op0, op1, op2);
13818 case IX86_BUILTIN_SQRTSS:
13819 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13820 case IX86_BUILTIN_RSQRTSS:
13821 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13822 case IX86_BUILTIN_RCPSS:
13823 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13825 case IX86_BUILTIN_LOADAPS:
13826 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13828 case IX86_BUILTIN_LOADUPS:
13829 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13831 case IX86_BUILTIN_STOREAPS:
13832 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13834 case IX86_BUILTIN_STOREUPS:
13835 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13837 case IX86_BUILTIN_LOADSS:
13838 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13840 case IX86_BUILTIN_STORESS:
13841 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13843 case IX86_BUILTIN_LOADHPS:
13844 case IX86_BUILTIN_LOADLPS:
13845 case IX86_BUILTIN_LOADHPD:
13846 case IX86_BUILTIN_LOADLPD:
13847 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13848 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13849 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13850 : CODE_FOR_sse2_movlpd);
13851 arg0 = TREE_VALUE (arglist);
13852 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13853 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13854 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13855 tmode = insn_data[icode].operand[0].mode;
13856 mode0 = insn_data[icode].operand[1].mode;
13857 mode1 = insn_data[icode].operand[2].mode;
13859 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13860 op0 = copy_to_mode_reg (mode0, op0);
13861 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13863 || GET_MODE (target) != tmode
13864 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13865 target = gen_reg_rtx (tmode);
13866 pat = GEN_FCN (icode) (target, op0, op1);
13872 case IX86_BUILTIN_STOREHPS:
13873 case IX86_BUILTIN_STORELPS:
13874 case IX86_BUILTIN_STOREHPD:
13875 case IX86_BUILTIN_STORELPD:
13876 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13877 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13878 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13879 : CODE_FOR_sse2_movlpd);
13880 arg0 = TREE_VALUE (arglist);
13881 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13882 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13883 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13884 mode0 = insn_data[icode].operand[1].mode;
13885 mode1 = insn_data[icode].operand[2].mode;
13887 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13888 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13889 op1 = copy_to_mode_reg (mode1, op1);
13891 pat = GEN_FCN (icode) (op0, op0, op1);
13897 case IX86_BUILTIN_MOVNTPS:
13898 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13899 case IX86_BUILTIN_MOVNTQ:
13900 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13902 case IX86_BUILTIN_LDMXCSR:
13903 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13904 target = assign_386_stack_local (SImode, 0);
13905 emit_move_insn (target, op0);
13906 emit_insn (gen_ldmxcsr (target));
13909 case IX86_BUILTIN_STMXCSR:
13910 target = assign_386_stack_local (SImode, 0);
13911 emit_insn (gen_stmxcsr (target));
13912 return copy_to_mode_reg (SImode, target);
13914 case IX86_BUILTIN_SHUFPS:
13915 case IX86_BUILTIN_SHUFPD:
13916 icode = (fcode == IX86_BUILTIN_SHUFPS
13917 ? CODE_FOR_sse_shufps
13918 : CODE_FOR_sse2_shufpd);
13919 arg0 = TREE_VALUE (arglist);
13920 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13921 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13922 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13923 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13924 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13925 tmode = insn_data[icode].operand[0].mode;
13926 mode0 = insn_data[icode].operand[1].mode;
13927 mode1 = insn_data[icode].operand[2].mode;
13928 mode2 = insn_data[icode].operand[3].mode;
13930 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13931 op0 = copy_to_mode_reg (mode0, op0);
13932 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13933 op1 = copy_to_mode_reg (mode1, op1);
13934 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13936 /* @@@ better error message */
13937 error ("mask must be an immediate");
13938 return gen_reg_rtx (tmode);
13941 || GET_MODE (target) != tmode
13942 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13943 target = gen_reg_rtx (tmode);
13944 pat = GEN_FCN (icode) (target, op0, op1, op2);
13950 case IX86_BUILTIN_PSHUFW:
13951 case IX86_BUILTIN_PSHUFD:
13952 case IX86_BUILTIN_PSHUFHW:
13953 case IX86_BUILTIN_PSHUFLW:
13954 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13955 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13956 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13957 : CODE_FOR_mmx_pshufw);
13958 arg0 = TREE_VALUE (arglist);
13959 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13960 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13961 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13962 tmode = insn_data[icode].operand[0].mode;
13963 mode1 = insn_data[icode].operand[1].mode;
13964 mode2 = insn_data[icode].operand[2].mode;
13966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13967 op0 = copy_to_mode_reg (mode1, op0);
13968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13970 /* @@@ better error message */
13971 error ("mask must be an immediate");
13975 || GET_MODE (target) != tmode
13976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13977 target = gen_reg_rtx (tmode);
13978 pat = GEN_FCN (icode) (target, op0, op1);
13984 case IX86_BUILTIN_PSLLDQI128:
13985 case IX86_BUILTIN_PSRLDQI128:
13986 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13987 : CODE_FOR_sse2_lshrti3);
13988 arg0 = TREE_VALUE (arglist);
13989 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13990 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13991 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13992 tmode = insn_data[icode].operand[0].mode;
13993 mode1 = insn_data[icode].operand[1].mode;
13994 mode2 = insn_data[icode].operand[2].mode;
13996 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13998 op0 = copy_to_reg (op0);
13999 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14001 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14003 error ("shift must be an immediate");
14006 target = gen_reg_rtx (V2DImode);
14007 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14013 case IX86_BUILTIN_FEMMS:
14014 emit_insn (gen_femms ());
14017 case IX86_BUILTIN_PAVGUSB:
14018 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14020 case IX86_BUILTIN_PF2ID:
14021 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14023 case IX86_BUILTIN_PFACC:
14024 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14026 case IX86_BUILTIN_PFADD:
14027 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14029 case IX86_BUILTIN_PFCMPEQ:
14030 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14032 case IX86_BUILTIN_PFCMPGE:
14033 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14035 case IX86_BUILTIN_PFCMPGT:
14036 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14038 case IX86_BUILTIN_PFMAX:
14039 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14041 case IX86_BUILTIN_PFMIN:
14042 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14044 case IX86_BUILTIN_PFMUL:
14045 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14047 case IX86_BUILTIN_PFRCP:
14048 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14050 case IX86_BUILTIN_PFRCPIT1:
14051 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14053 case IX86_BUILTIN_PFRCPIT2:
14054 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14056 case IX86_BUILTIN_PFRSQIT1:
14057 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14059 case IX86_BUILTIN_PFRSQRT:
14060 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14062 case IX86_BUILTIN_PFSUB:
14063 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14065 case IX86_BUILTIN_PFSUBR:
14066 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14068 case IX86_BUILTIN_PI2FD:
14069 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14071 case IX86_BUILTIN_PMULHRW:
14072 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14074 case IX86_BUILTIN_PF2IW:
14075 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14077 case IX86_BUILTIN_PFNACC:
14078 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14080 case IX86_BUILTIN_PFPNACC:
14081 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14083 case IX86_BUILTIN_PI2FW:
14084 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14086 case IX86_BUILTIN_PSWAPDSI:
14087 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14089 case IX86_BUILTIN_PSWAPDSF:
14090 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14092 case IX86_BUILTIN_SSE_ZERO:
14093 target = gen_reg_rtx (V4SFmode);
14094 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14097 case IX86_BUILTIN_MMX_ZERO:
14098 target = gen_reg_rtx (DImode);
14099 emit_insn (gen_mmx_clrdi (target));
14102 case IX86_BUILTIN_CLRTI:
14103 target = gen_reg_rtx (V2DImode);
14104 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14108 case IX86_BUILTIN_SQRTSD:
14109 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14110 case IX86_BUILTIN_LOADAPD:
14111 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14112 case IX86_BUILTIN_LOADUPD:
14113 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14115 case IX86_BUILTIN_STOREAPD:
14116 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14117 case IX86_BUILTIN_STOREUPD:
14118 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14120 case IX86_BUILTIN_LOADSD:
14121 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14123 case IX86_BUILTIN_STORESD:
14124 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14126 case IX86_BUILTIN_SETPD1:
14127 target = assign_386_stack_local (DFmode, 0);
14128 arg0 = TREE_VALUE (arglist);
14129 emit_move_insn (adjust_address (target, DFmode, 0),
14130 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14131 op0 = gen_reg_rtx (V2DFmode);
14132 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14133 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14136 case IX86_BUILTIN_SETPD:
14137 target = assign_386_stack_local (V2DFmode, 0);
14138 arg0 = TREE_VALUE (arglist);
14139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14140 emit_move_insn (adjust_address (target, DFmode, 0),
14141 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14142 emit_move_insn (adjust_address (target, DFmode, 8),
14143 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14144 op0 = gen_reg_rtx (V2DFmode);
14145 emit_insn (gen_sse2_movapd (op0, target));
14148 case IX86_BUILTIN_LOADRPD:
14149 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14150 gen_reg_rtx (V2DFmode), 1);
14151 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14154 case IX86_BUILTIN_LOADPD1:
14155 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14156 gen_reg_rtx (V2DFmode), 1);
14157 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14160 case IX86_BUILTIN_STOREPD1:
14161 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14162 case IX86_BUILTIN_STORERPD:
14163 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14165 case IX86_BUILTIN_CLRPD:
14166 target = gen_reg_rtx (V2DFmode);
14167 emit_insn (gen_sse_clrv2df (target));
14170 case IX86_BUILTIN_MFENCE:
14171 emit_insn (gen_sse2_mfence ());
14173 case IX86_BUILTIN_LFENCE:
14174 emit_insn (gen_sse2_lfence ());
14177 case IX86_BUILTIN_CLFLUSH:
14178 arg0 = TREE_VALUE (arglist);
14179 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14180 icode = CODE_FOR_sse2_clflush;
14181 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14182 op0 = copy_to_mode_reg (Pmode, op0);
14184 emit_insn (gen_sse2_clflush (op0));
14187 case IX86_BUILTIN_MOVNTPD:
14188 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14189 case IX86_BUILTIN_MOVNTDQ:
14190 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14191 case IX86_BUILTIN_MOVNTI:
14192 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14194 case IX86_BUILTIN_LOADDQA:
14195 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14196 case IX86_BUILTIN_LOADDQU:
14197 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14198 case IX86_BUILTIN_LOADD:
14199 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14201 case IX86_BUILTIN_STOREDQA:
14202 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14203 case IX86_BUILTIN_STOREDQU:
14204 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14205 case IX86_BUILTIN_STORED:
14206 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14212 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14213 if (d->code == fcode)
14215 /* Compares are treated specially. */
14216 if (d->icode == CODE_FOR_maskcmpv4sf3
14217 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14218 || d->icode == CODE_FOR_maskncmpv4sf3
14219 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14220 || d->icode == CODE_FOR_maskcmpv2df3
14221 || d->icode == CODE_FOR_vmmaskcmpv2df3
14222 || d->icode == CODE_FOR_maskncmpv2df3
14223 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14224 return ix86_expand_sse_compare (d, arglist, target);
14226 return ix86_expand_binop_builtin (d->icode, arglist, target);
14229 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14230 if (d->code == fcode)
14231 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14233 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14234 if (d->code == fcode)
14235 return ix86_expand_sse_comi (d, arglist, target);
14237 /* @@@ Should really do something sensible here. */
14241 /* Store OPERAND to the memory after reload is completed. This means
14242 that we can't easily use assign_stack_local. */
14244 ix86_force_to_memory (mode, operand)
14245 enum machine_mode mode;
14249 if (!reload_completed)
14251 if (TARGET_64BIT && TARGET_RED_ZONE)
14253 result = gen_rtx_MEM (mode,
14254 gen_rtx_PLUS (Pmode,
14256 GEN_INT (-RED_ZONE_SIZE)));
14257 emit_move_insn (result, operand);
14259 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14265 operand = gen_lowpart (DImode, operand);
14269 gen_rtx_SET (VOIDmode,
14270 gen_rtx_MEM (DImode,
14271 gen_rtx_PRE_DEC (DImode,
14272 stack_pointer_rtx)),
14278 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14287 split_di (&operand, 1, operands, operands + 1);
14289 gen_rtx_SET (VOIDmode,
14290 gen_rtx_MEM (SImode,
14291 gen_rtx_PRE_DEC (Pmode,
14292 stack_pointer_rtx)),
14295 gen_rtx_SET (VOIDmode,
14296 gen_rtx_MEM (SImode,
14297 gen_rtx_PRE_DEC (Pmode,
14298 stack_pointer_rtx)),
14303 /* It is better to store HImodes as SImodes. */
14304 if (!TARGET_PARTIAL_REG_STALL)
14305 operand = gen_lowpart (SImode, operand);
14309 gen_rtx_SET (VOIDmode,
14310 gen_rtx_MEM (GET_MODE (operand),
14311 gen_rtx_PRE_DEC (SImode,
14312 stack_pointer_rtx)),
14318 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14323 /* Free operand from the memory. */
14325 ix86_free_from_memory (mode)
14326 enum machine_mode mode;
14328 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14332 if (mode == DImode || TARGET_64BIT)
14334 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14338 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14339 to pop or add instruction if registers are available. */
14340 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14341 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14346 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14347 QImode must go into class Q_REGS.
14348 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14349 movdf to do mem-to-mem moves through integer regs. */
14351 ix86_preferred_reload_class (x, class)
14353 enum reg_class class;
14355 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14357 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14359 /* SSE can't load any constant directly yet. */
14360 if (SSE_CLASS_P (class))
14362 /* Floats can load 0 and 1. */
14363 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14365 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14366 if (MAYBE_SSE_CLASS_P (class))
14367 return (reg_class_subset_p (class, GENERAL_REGS)
14368 ? GENERAL_REGS : FLOAT_REGS);
14372 /* General regs can load everything. */
14373 if (reg_class_subset_p (class, GENERAL_REGS))
14374 return GENERAL_REGS;
14375 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14376 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14379 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14381 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14386 /* If we are copying between general and FP registers, we need a memory
14387 location. The same is true for SSE and MMX registers.
14389 The macro can't work reliably when one of the CLASSES is class containing
14390 registers from multiple units (SSE, MMX, integer). We avoid this by never
14391 combining those units in single alternative in the machine description.
14392 Ensure that this constraint holds to avoid unexpected surprises.
14394 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14395 enforce these sanity checks. */
14397 ix86_secondary_memory_needed (class1, class2, mode, strict)
14398 enum reg_class class1, class2;
14399 enum machine_mode mode;
14402 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14403 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14404 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14405 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14406 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14407 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14414 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14415 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14416 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14417 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14418 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14420 /* Return the cost of moving data from a register in class CLASS1 to
14421 one in class CLASS2.
14423 It is not required that the cost always equal 2 when FROM is the same as TO;
14424 on some machines it is expensive to move between registers if they are not
14425 general registers. */
14427 ix86_register_move_cost (mode, class1, class2)
14428 enum machine_mode mode;
14429 enum reg_class class1, class2;
14431 /* In case we require secondary memory, compute cost of the store followed
14432 by load. In order to avoid bad register allocation choices, we need
14433 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14435 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14439 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14440 MEMORY_MOVE_COST (mode, class1, 1));
14441 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14442 MEMORY_MOVE_COST (mode, class2, 1));
14444 /* In case of copying from general_purpose_register we may emit multiple
14445 stores followed by single load causing memory size mismatch stall.
14446 Count this as arbitrarily high cost of 20. */
14447 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14450 /* In the case of FP/MMX moves, the registers actually overlap, and we
14451 have to switch modes in order to treat them differently. */
14452 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14453 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14459 /* Moves between SSE/MMX and integer unit are expensive. */
14460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14461 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14462 return ix86_cost->mmxsse_to_integer;
14463 if (MAYBE_FLOAT_CLASS_P (class1))
14464 return ix86_cost->fp_move;
14465 if (MAYBE_SSE_CLASS_P (class1))
14466 return ix86_cost->sse_move;
14467 if (MAYBE_MMX_CLASS_P (class1))
14468 return ix86_cost->mmx_move;
14472 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14474 ix86_hard_regno_mode_ok (regno, mode)
14476 enum machine_mode mode;
14478 /* Flags and only flags can only hold CCmode values. */
14479 if (CC_REGNO_P (regno))
14480 return GET_MODE_CLASS (mode) == MODE_CC;
14481 if (GET_MODE_CLASS (mode) == MODE_CC
14482 || GET_MODE_CLASS (mode) == MODE_RANDOM
14483 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14485 if (FP_REGNO_P (regno))
14486 return VALID_FP_MODE_P (mode);
14487 if (SSE_REGNO_P (regno))
14488 return VALID_SSE_REG_MODE (mode);
14489 if (MMX_REGNO_P (regno))
14490 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14491 /* We handle both integer and floats in the general purpose registers.
14492 In future we should be able to handle vector modes as well. */
14493 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14495 /* Take care for QImode values - they can be in non-QI regs, but then
14496 they do cause partial register stalls. */
14497 if (regno < 4 || mode != QImode || TARGET_64BIT)
14499 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14502 /* Return the cost of moving data of mode M between a
14503 register and memory. A value of 2 is the default; this cost is
14504 relative to those in `REGISTER_MOVE_COST'.
14506 If moving between registers and memory is more expensive than
14507 between two registers, you should define this macro to express the
14510 Model also increased moving costs of QImode registers in non
14514 ix86_memory_move_cost (mode, class, in)
14515 enum machine_mode mode;
14516 enum reg_class class;
14519 if (FLOAT_CLASS_P (class))
14537 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14539 if (SSE_CLASS_P (class))
14542 switch (GET_MODE_SIZE (mode))
14556 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14558 if (MMX_CLASS_P (class))
14561 switch (GET_MODE_SIZE (mode))
14572 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14574 switch (GET_MODE_SIZE (mode))
14578 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14579 : ix86_cost->movzbl_load);
14581 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14582 : ix86_cost->int_store[0] + 4);
14585 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14587 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14588 if (mode == TFmode)
14590 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14591 * ((int) GET_MODE_SIZE (mode)
14592 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14596 /* Compute a (partial) cost for rtx X. Return true if the complete
14597 cost has been computed, and false if subexpressions should be
14598 scanned. In either case, *TOTAL contains the cost result. */
14601 ix86_rtx_costs (x, code, outer_code, total)
14603 int code, outer_code;
14606 enum machine_mode mode = GET_MODE (x);
14614 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14616 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14618 else if (flag_pic && SYMBOLIC_CONST (x))
14625 if (mode == VOIDmode)
14628 switch (standard_80387_constant_p (x))
14637 /* Start with (MEM (SYMBOL_REF)), since that's where
14638 it'll probably end up. Add a penalty for size. */
14639 *total = (COSTS_N_INSNS (1)
14641 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14647 /* The zero extensions is often completely free on x86_64, so make
14648 it as cheap as possible. */
14649 if (TARGET_64BIT && mode == DImode
14650 && GET_MODE (XEXP (x, 0)) == SImode)
14652 else if (TARGET_ZERO_EXTEND_WITH_AND)
14653 *total = COSTS_N_INSNS (ix86_cost->add);
14655 *total = COSTS_N_INSNS (ix86_cost->movzx);
14659 *total = COSTS_N_INSNS (ix86_cost->movsx);
14663 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14664 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14666 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14669 *total = COSTS_N_INSNS (ix86_cost->add);
14672 if ((value == 2 || value == 3)
14673 && !TARGET_DECOMPOSE_LEA
14674 && ix86_cost->lea <= ix86_cost->shift_const)
14676 *total = COSTS_N_INSNS (ix86_cost->lea);
14686 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14688 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14690 if (INTVAL (XEXP (x, 1)) > 32)
14691 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14693 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14697 if (GET_CODE (XEXP (x, 1)) == AND)
14698 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14700 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14705 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14706 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14708 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14713 if (FLOAT_MODE_P (mode))
14714 *total = COSTS_N_INSNS (ix86_cost->fmul);
14715 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14717 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14720 for (nbits = 0; value != 0; value >>= 1)
14723 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14724 + nbits * ix86_cost->mult_bit);
14728 /* This is arbitrary */
14729 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14730 + 7 * ix86_cost->mult_bit);
14738 if (FLOAT_MODE_P (mode))
14739 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14741 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14745 if (FLOAT_MODE_P (mode))
14746 *total = COSTS_N_INSNS (ix86_cost->fadd);
14747 else if (!TARGET_DECOMPOSE_LEA
14748 && GET_MODE_CLASS (mode) == MODE_INT
14749 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14751 if (GET_CODE (XEXP (x, 0)) == PLUS
14752 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14753 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14754 && CONSTANT_P (XEXP (x, 1)))
14756 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14757 if (val == 2 || val == 4 || val == 8)
14759 *total = COSTS_N_INSNS (ix86_cost->lea);
14760 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14761 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14763 *total += rtx_cost (XEXP (x, 1), outer_code);
14767 else if (GET_CODE (XEXP (x, 0)) == MULT
14768 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14770 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14771 if (val == 2 || val == 4 || val == 8)
14773 *total = COSTS_N_INSNS (ix86_cost->lea);
14774 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14775 *total += rtx_cost (XEXP (x, 1), outer_code);
14779 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14781 *total = COSTS_N_INSNS (ix86_cost->lea);
14782 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14783 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14784 *total += rtx_cost (XEXP (x, 1), outer_code);
14791 if (FLOAT_MODE_P (mode))
14793 *total = COSTS_N_INSNS (ix86_cost->fadd);
14801 if (!TARGET_64BIT && mode == DImode)
14803 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14804 + (rtx_cost (XEXP (x, 0), outer_code)
14805 << (GET_MODE (XEXP (x, 0)) != DImode))
14806 + (rtx_cost (XEXP (x, 1), outer_code)
14807 << (GET_MODE (XEXP (x, 1)) != DImode)));
14813 if (FLOAT_MODE_P (mode))
14815 *total = COSTS_N_INSNS (ix86_cost->fchs);
14821 if (!TARGET_64BIT && mode == DImode)
14822 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14824 *total = COSTS_N_INSNS (ix86_cost->add);
14828 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14833 if (FLOAT_MODE_P (mode))
14834 *total = COSTS_N_INSNS (ix86_cost->fabs);
14838 if (FLOAT_MODE_P (mode))
14839 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14847 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14849 ix86_svr3_asm_out_constructor (symbol, priority)
14851 int priority ATTRIBUTE_UNUSED;
14854 fputs ("\tpushl $", asm_out_file);
14855 assemble_name (asm_out_file, XSTR (symbol, 0));
14856 fputc ('\n', asm_out_file);
14862 static int current_machopic_label_num;
14864 /* Given a symbol name and its associated stub, write out the
14865 definition of the stub. */
14868 machopic_output_stub (file, symb, stub)
14870 const char *symb, *stub;
14872 unsigned int length;
14873 char *binder_name, *symbol_name, lazy_ptr_name[32];
14874 int label = ++current_machopic_label_num;
14876 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14877 symb = (*targetm.strip_name_encoding) (symb);
14879 length = strlen (stub);
14880 binder_name = alloca (length + 32);
14881 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14883 length = strlen (symb);
14884 symbol_name = alloca (length + 32);
14885 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14887 sprintf (lazy_ptr_name, "L%d$lz", label);
14890 machopic_picsymbol_stub_section ();
14892 machopic_symbol_stub_section ();
14894 fprintf (file, "%s:\n", stub);
14895 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14899 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14900 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14901 fprintf (file, "\tjmp %%edx\n");
14904 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14906 fprintf (file, "%s:\n", binder_name);
14910 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14911 fprintf (file, "\tpushl %%eax\n");
14914 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14916 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14918 machopic_lazy_symbol_ptr_section ();
14919 fprintf (file, "%s:\n", lazy_ptr_name);
14920 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14921 fprintf (file, "\t.long %s\n", binder_name);
14923 #endif /* TARGET_MACHO */
14925 /* Order the registers for register allocator. */
14928 x86_order_regs_for_local_alloc ()
14933 /* First allocate the local general purpose registers. */
14934 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14935 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14936 reg_alloc_order [pos++] = i;
14938 /* Global general purpose registers. */
14939 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14940 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14941 reg_alloc_order [pos++] = i;
14943 /* x87 registers come first in case we are doing FP math
14945 if (!TARGET_SSE_MATH)
14946 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14947 reg_alloc_order [pos++] = i;
14949 /* SSE registers. */
14950 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14951 reg_alloc_order [pos++] = i;
14952 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14953 reg_alloc_order [pos++] = i;
14955 /* x87 registers. */
14956 if (TARGET_SSE_MATH)
14957 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14958 reg_alloc_order [pos++] = i;
14960 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14961 reg_alloc_order [pos++] = i;
14963 /* Initialize the rest of array as we do not allocate some registers
14965 while (pos < FIRST_PSEUDO_REGISTER)
14966 reg_alloc_order [pos++] = 0;
14969 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14970 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14973 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14974 struct attribute_spec.handler. */
14976 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14979 tree args ATTRIBUTE_UNUSED;
14980 int flags ATTRIBUTE_UNUSED;
14981 bool *no_add_attrs;
14984 if (DECL_P (*node))
14986 if (TREE_CODE (*node) == TYPE_DECL)
14987 type = &TREE_TYPE (*node);
14992 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14993 || TREE_CODE (*type) == UNION_TYPE)))
14995 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14996 *no_add_attrs = true;
14999 else if ((is_attribute_p ("ms_struct", name)
15000 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15001 || ((is_attribute_p ("gcc_struct", name)
15002 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15004 warning ("`%s' incompatible attribute ignored",
15005 IDENTIFIER_POINTER (name));
15006 *no_add_attrs = true;
15013 ix86_ms_bitfield_layout_p (record_type)
15016 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15017 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15018 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15021 /* Returns an expression indicating where the this parameter is
15022 located on entry to the FUNCTION. */
15025 x86_this_parameter (function)
15028 tree type = TREE_TYPE (function);
15032 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15033 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15036 if (ix86_fntype_regparm (type) > 0)
15040 parm = TYPE_ARG_TYPES (type);
15041 /* Figure out whether or not the function has a variable number of
15043 for (; parm; parm = TREE_CHAIN (parm))
15044 if (TREE_VALUE (parm) == void_type_node)
15046 /* If not, the this parameter is in %eax. */
15048 return gen_rtx_REG (SImode, 0);
15051 if (aggregate_value_p (TREE_TYPE (type)))
15052 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15054 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15057 /* Determine whether x86_output_mi_thunk can succeed. */
15060 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15061 tree thunk ATTRIBUTE_UNUSED;
15062 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15063 HOST_WIDE_INT vcall_offset;
15066 /* 64-bit can handle anything. */
15070 /* For 32-bit, everything's fine if we have one free register. */
15071 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15074 /* Need a free register for vcall_offset. */
15078 /* Need a free register for GOT references. */
15079 if (flag_pic && !(*targetm.binds_local_p) (function))
15082 /* Otherwise ok. */
15086 /* Output the assembler code for a thunk function. THUNK_DECL is the
15087 declaration for the thunk function itself, FUNCTION is the decl for
15088 the target function. DELTA is an immediate constant offset to be
15089 added to THIS. If VCALL_OFFSET is nonzero, the word at
15090 *(*this + vcall_offset) should be added to THIS. */
15093 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15094 FILE *file ATTRIBUTE_UNUSED;
15095 tree thunk ATTRIBUTE_UNUSED;
15096 HOST_WIDE_INT delta;
15097 HOST_WIDE_INT vcall_offset;
15101 rtx this = x86_this_parameter (function);
15104 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15105 pull it in now and let DELTA benefit. */
15108 else if (vcall_offset)
15110 /* Put the this parameter into %eax. */
15112 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15113 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15116 this_reg = NULL_RTX;
15118 /* Adjust the this parameter by a fixed constant. */
15121 xops[0] = GEN_INT (delta);
15122 xops[1] = this_reg ? this_reg : this;
15125 if (!x86_64_general_operand (xops[0], DImode))
15127 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15129 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15133 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15136 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15139 /* Adjust the this parameter by a value stored in the vtable. */
15143 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15145 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15147 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15150 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15152 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15154 /* Adjust the this parameter. */
15155 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15156 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15158 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15159 xops[0] = GEN_INT (vcall_offset);
15161 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15162 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15164 xops[1] = this_reg;
15166 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15168 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15171 /* If necessary, drop THIS back to its stack slot. */
15172 if (this_reg && this_reg != this)
15174 xops[0] = this_reg;
15176 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15179 xops[0] = DECL_RTL (function);
15182 if (!flag_pic || (*targetm.binds_local_p) (function))
15183 output_asm_insn ("jmp\t%P0", xops);
15186 tmp = XEXP (xops[0], 0);
15187 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15188 tmp = gen_rtx_CONST (Pmode, tmp);
15189 tmp = gen_rtx_MEM (QImode, tmp);
15191 output_asm_insn ("jmp\t%A0", xops);
15196 if (!flag_pic || (*targetm.binds_local_p) (function))
15197 output_asm_insn ("jmp\t%P0", xops);
15202 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15203 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15204 tmp = gen_rtx_MEM (QImode, tmp);
15206 output_asm_insn ("jmp\t%0", xops);
15209 #endif /* TARGET_MACHO */
15211 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15212 output_set_got (tmp);
15215 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15216 output_asm_insn ("jmp\t{*}%1", xops);
15222 x86_field_alignment (field, computed)
15226 enum machine_mode mode;
15227 tree type = TREE_TYPE (field);
15229 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15231 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15232 ? get_inner_array_type (type) : type);
15233 if (mode == DFmode || mode == DCmode
15234 || GET_MODE_CLASS (mode) == MODE_INT
15235 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15236 return MIN (32, computed);
15240 /* Output assembler code to FILE to increment profiler label # LABELNO
15241 for profiling a function entry. */
15243 x86_function_profiler (file, labelno)
15245 int labelno ATTRIBUTE_UNUSED;
15250 #ifndef NO_PROFILE_COUNTERS
15251 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15253 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15257 #ifndef NO_PROFILE_COUNTERS
15258 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15260 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15264 #ifndef NO_PROFILE_COUNTERS
15265 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15266 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15268 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15272 #ifndef NO_PROFILE_COUNTERS
15273 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15274 PROFILE_COUNT_REGISTER);
15276 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15280 /* Implement machine specific optimizations.
15281 At the moment we implement single transformation: AMD Athlon works faster
15282 when RET is not destination of conditional jump or directly preceded
15283 by other jump instruction. We avoid the penalty by inserting NOP just
15284 before the RET instructions in such cases. */
15286 x86_machine_dependent_reorg (first)
15287 rtx first ATTRIBUTE_UNUSED;
15291 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15293 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15295 basic_block bb = e->src;
15298 bool insert = false;
15300 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15302 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15303 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15305 if (prev && GET_CODE (prev) == CODE_LABEL)
15308 for (e = bb->pred; e; e = e->pred_next)
15309 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15310 && !(e->flags & EDGE_FALLTHRU))
15315 prev = prev_active_insn (ret);
15316 if (prev && GET_CODE (prev) == JUMP_INSN
15317 && any_condjump_p (prev))
15319 /* Empty functions get branch misspredict even when the jump destination
15320 is not visible to us. */
15321 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15325 emit_insn_before (gen_nop (), ret);
15329 /* Return nonzero when QImode register that must be represented via REX prefix
15332 x86_extended_QIreg_mentioned_p (insn)
15336 extract_insn_cached (insn);
15337 for (i = 0; i < recog_data.n_operands; i++)
15338 if (REG_P (recog_data.operand[i])
15339 && REGNO (recog_data.operand[i]) >= 4)
15344 /* Return nonzero when P points to register encoded via REX prefix.
15345 Called via for_each_rtx. */
15347 extended_reg_mentioned_1 (p, data)
15349 void *data ATTRIBUTE_UNUSED;
15351 unsigned int regno;
15354 regno = REGNO (*p);
15355 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15358 /* Return true when INSN mentions register that must be encoded using REX
15361 x86_extended_reg_mentioned_p (insn)
15364 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15367 /* Generate an unsigned DImode to FP conversion. This is the same code
15368 optabs would emit if we didn't have TFmode patterns. */
15371 x86_emit_floatuns (operands)
15374 rtx neglab, donelab, i0, i1, f0, in, out;
15375 enum machine_mode mode;
15378 in = force_reg (DImode, operands[1]);
15379 mode = GET_MODE (out);
15380 neglab = gen_label_rtx ();
15381 donelab = gen_label_rtx ();
15382 i1 = gen_reg_rtx (Pmode);
15383 f0 = gen_reg_rtx (mode);
15385 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15387 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15388 emit_jump_insn (gen_jump (donelab));
15391 emit_label (neglab);
15393 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15394 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15395 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15396 expand_float (f0, i0, 0);
15397 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15399 emit_label (donelab);
15402 #include "gt-i386.h"