1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Set by prologue expander and used by epilogue expander to determine
535 static int use_fast_prologue_epilogue;
537 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
538 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
539 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
540 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
542 /* Array of the smallest class containing reg number REGNO, indexed by
543 REGNO. Used by REGNO_REG_CLASS in i386.h. */
545 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
548 AREG, DREG, CREG, BREG,
550 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
552 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
553 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
556 /* flags, fpsr, dirflag, frame */
557 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
558 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
560 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
562 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
563 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
564 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
568 /* The "default" register map used in 32bit mode. */
570 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
572 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
573 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
574 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
575 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
576 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
577 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
578 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
581 static int const x86_64_int_parameter_registers[6] =
583 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
584 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
587 static int const x86_64_int_return_registers[4] =
589 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
592 /* The "default" register map used in 64bit mode. */
593 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
595 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
596 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
597 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
598 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
599 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
600 8,9,10,11,12,13,14,15, /* extended integer registers */
601 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
604 /* Define the register numbers to be used in Dwarf debugging information.
605 The SVR4 reference port C compiler uses the following register numbers
606 in its Dwarf output code:
607 0 for %eax (gcc regno = 0)
608 1 for %ecx (gcc regno = 2)
609 2 for %edx (gcc regno = 1)
610 3 for %ebx (gcc regno = 3)
611 4 for %esp (gcc regno = 7)
612 5 for %ebp (gcc regno = 6)
613 6 for %esi (gcc regno = 4)
614 7 for %edi (gcc regno = 5)
615 The following three DWARF register numbers are never generated by
616 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
617 believes these numbers have these meanings.
618 8 for %eip (no gcc equivalent)
619 9 for %eflags (gcc regno = 17)
620 10 for %trapno (no gcc equivalent)
621 It is not at all clear how we should number the FP stack registers
622 for the x86 architecture. If the version of SDB on x86/svr4 were
623 a bit less brain dead with respect to floating-point then we would
624 have a precedent to follow with respect to DWARF register numbers
625 for x86 FP registers, but the SDB on x86/svr4 is so completely
626 broken with respect to FP registers that it is hardly worth thinking
627 of it as something to strive for compatibility with.
628 The version of x86/svr4 SDB I have at the moment does (partially)
629 seem to believe that DWARF register number 11 is associated with
630 the x86 register %st(0), but that's about all. Higher DWARF
631 register numbers don't seem to be associated with anything in
632 particular, and even for DWARF regno 11, SDB only seems to under-
633 stand that it should say that a variable lives in %st(0) (when
634 asked via an `=' command) if we said it was in DWARF regno 11,
635 but SDB still prints garbage when asked for the value of the
636 variable in question (via a `/' command).
637 (Also note that the labels SDB prints for various FP stack regs
638 when doing an `x' command are all wrong.)
639 Note that these problems generally don't affect the native SVR4
640 C compiler because it doesn't allow the use of -O with -g and
641 because when it is *not* optimizing, it allocates a memory
642 location for each floating-point variable, and the memory
643 location is what gets described in the DWARF AT_location
644 attribute for the variable in question.
645 Regardless of the severe mental illness of the x86/svr4 SDB, we
646 do something sensible here and we use the following DWARF
647 register numbers. Note that these are all stack-top-relative
649 11 for %st(0) (gcc regno = 8)
650 12 for %st(1) (gcc regno = 9)
651 13 for %st(2) (gcc regno = 10)
652 14 for %st(3) (gcc regno = 11)
653 15 for %st(4) (gcc regno = 12)
654 16 for %st(5) (gcc regno = 13)
655 17 for %st(6) (gcc regno = 14)
656 18 for %st(7) (gcc regno = 15)
658 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
660 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
661 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
662 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
663 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
664 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
665 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
666 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
669 /* Test and compare insns in i386.md store the information needed to
670 generate branch and scc insns here. */
672 rtx ix86_compare_op0 = NULL_RTX;
673 rtx ix86_compare_op1 = NULL_RTX;
675 /* The encoding characters for the four TLS models present in ELF. */
677 static char const tls_model_chars[] = " GLil";
679 #define MAX_386_STACK_LOCALS 3
680 /* Size of the register save area. */
681 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
683 /* Define the structure for the machine field in struct function. */
685 struct stack_local_entry GTY(())
690 struct stack_local_entry *next;
693 /* Structure describing stack frame layout.
694 Stack grows downward:
700 saved frame pointer if frame_pointer_needed
701 <- HARD_FRAME_POINTER
707 > to_allocate <- FRAME_POINTER
719 int outgoing_arguments_size;
722 HOST_WIDE_INT to_allocate;
723 /* The offsets relative to ARG_POINTER. */
724 HOST_WIDE_INT frame_pointer_offset;
725 HOST_WIDE_INT hard_frame_pointer_offset;
726 HOST_WIDE_INT stack_pointer_offset;
729 /* Used to enable/disable debugging features. */
730 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
731 /* Code model option as passed by user. */
732 const char *ix86_cmodel_string;
734 enum cmodel ix86_cmodel;
736 const char *ix86_asm_string;
737 enum asm_dialect ix86_asm_dialect = ASM_ATT;
739 const char *ix86_tls_dialect_string;
740 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
742 /* Which unit we are generating floating point math for. */
743 enum fpmath_unit ix86_fpmath;
745 /* Which cpu are we scheduling for. */
746 enum processor_type ix86_tune;
747 /* Which instruction set architecture to use. */
748 enum processor_type ix86_arch;
750 /* Strings to hold which cpu and instruction set architecture to use. */
751 const char *ix86_tune_string; /* for -mtune=<xxx> */
752 const char *ix86_arch_string; /* for -march=<xxx> */
753 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
755 /* # of registers to use to pass arguments. */
756 const char *ix86_regparm_string;
758 /* true if sse prefetch instruction is not NOOP. */
759 int x86_prefetch_sse;
761 /* ix86_regparm_string as a number */
764 /* Alignment to use for loops and jumps: */
766 /* Power of two alignment for loops. */
767 const char *ix86_align_loops_string;
769 /* Power of two alignment for non-loop jumps. */
770 const char *ix86_align_jumps_string;
772 /* Power of two alignment for stack boundary in bytes. */
773 const char *ix86_preferred_stack_boundary_string;
775 /* Preferred alignment for stack boundary in bits. */
776 int ix86_preferred_stack_boundary;
778 /* Values 1-5: see jump.c */
779 int ix86_branch_cost;
780 const char *ix86_branch_cost_string;
782 /* Power of two alignment for functions. */
783 const char *ix86_align_funcs_string;
785 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
786 static char internal_label_prefix[16];
787 static int internal_label_prefix_len;
789 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
790 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
791 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
792 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
794 static const char *get_some_local_dynamic_name PARAMS ((void));
795 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
796 static rtx maybe_get_pool_constant PARAMS ((rtx));
797 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
798 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
800 static rtx get_thread_pointer PARAMS ((void));
801 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
802 static rtx gen_push PARAMS ((rtx));
803 static int memory_address_length PARAMS ((rtx addr));
804 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
806 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
807 static void ix86_dump_ppro_packet PARAMS ((FILE *));
808 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
809 static struct machine_function * ix86_init_machine_status PARAMS ((void));
810 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
811 static int ix86_nsaved_regs PARAMS ((void));
812 static void ix86_emit_save_regs PARAMS ((void));
813 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
814 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
815 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
816 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
817 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
818 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
819 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
820 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
821 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
822 static int ix86_issue_rate PARAMS ((void));
823 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
824 static void ix86_sched_init PARAMS ((FILE *, int, int));
825 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
826 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
827 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
828 static int ia32_multipass_dfa_lookahead PARAMS ((void));
829 static void ix86_init_mmx_sse_builtins PARAMS ((void));
830 static rtx x86_this_parameter PARAMS ((tree));
831 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
832 HOST_WIDE_INT, tree));
833 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
834 HOST_WIDE_INT, tree));
835 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
839 rtx base, index, disp;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
849 static const char *ix86_strip_name_encoding PARAMS ((const char *))
852 struct builtin_description;
853 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
855 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
857 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
858 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
859 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
860 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
861 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
862 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
863 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
867 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
869 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
870 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
871 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
872 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
873 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
874 static int ix86_save_reg PARAMS ((unsigned int, int));
875 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
876 static int ix86_comp_type_attributes PARAMS ((tree, tree));
877 static int ix86_fntype_regparm PARAMS ((tree));
878 const struct attribute_spec ix86_attribute_table[];
879 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
880 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
881 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int ix86_value_regno PARAMS ((enum machine_mode));
883 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
884 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
885 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
886 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
887 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
889 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
890 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
893 /* Register class used for passing given 64bit part of the argument.
894 These represent classes as documented by the PS ABI, with the exception
895 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
896 use SF or DFmode move instead of DImode to avoid reformatting penalties.
898 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
899 whenever possible (upper half does contain padding).
901 enum x86_64_reg_class
904 X86_64_INTEGER_CLASS,
905 X86_64_INTEGERSI_CLASS,
914 static const char * const x86_64_reg_class_name[] =
915 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
917 #define MAX_CLASSES 4
918 static int classify_argument PARAMS ((enum machine_mode, tree,
919 enum x86_64_reg_class [MAX_CLASSES],
921 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
923 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
925 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
926 enum x86_64_reg_class));
928 /* Table of constants used by fldpi, fldln2, etc... */
929 static REAL_VALUE_TYPE ext_80387_constants_table [5];
930 static bool ext_80387_constants_init = 0;
931 static void init_ext_80387_constants PARAMS ((void));
933 /* Initialize the GCC target structure. */
934 #undef TARGET_ATTRIBUTE_TABLE
935 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
936 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
937 # undef TARGET_MERGE_DECL_ATTRIBUTES
938 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
941 #undef TARGET_COMP_TYPE_ATTRIBUTES
942 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
944 #undef TARGET_INIT_BUILTINS
945 #define TARGET_INIT_BUILTINS ix86_init_builtins
947 #undef TARGET_EXPAND_BUILTIN
948 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
950 #undef TARGET_ASM_FUNCTION_EPILOGUE
951 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
953 #undef TARGET_ASM_OPEN_PAREN
954 #define TARGET_ASM_OPEN_PAREN ""
955 #undef TARGET_ASM_CLOSE_PAREN
956 #define TARGET_ASM_CLOSE_PAREN ""
958 #undef TARGET_ASM_ALIGNED_HI_OP
959 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
960 #undef TARGET_ASM_ALIGNED_SI_OP
961 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
963 #undef TARGET_ASM_ALIGNED_DI_OP
964 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
967 #undef TARGET_ASM_UNALIGNED_HI_OP
968 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
969 #undef TARGET_ASM_UNALIGNED_SI_OP
970 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
971 #undef TARGET_ASM_UNALIGNED_DI_OP
972 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
974 #undef TARGET_SCHED_ADJUST_COST
975 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
976 #undef TARGET_SCHED_ISSUE_RATE
977 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
978 #undef TARGET_SCHED_VARIABLE_ISSUE
979 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
980 #undef TARGET_SCHED_INIT
981 #define TARGET_SCHED_INIT ix86_sched_init
982 #undef TARGET_SCHED_REORDER
983 #define TARGET_SCHED_REORDER ix86_sched_reorder
984 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
985 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
986 ia32_use_dfa_pipeline_interface
987 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
988 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
989 ia32_multipass_dfa_lookahead
991 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
992 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
995 #undef TARGET_HAVE_TLS
996 #define TARGET_HAVE_TLS true
998 #undef TARGET_CANNOT_FORCE_CONST_MEM
999 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1001 #undef TARGET_DELEGITIMIZE_ADDRESS
1002 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1004 #undef TARGET_MS_BITFIELD_LAYOUT_P
1005 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1007 #undef TARGET_ASM_OUTPUT_MI_THUNK
1008 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1009 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1010 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1012 #undef TARGET_RTX_COSTS
1013 #define TARGET_RTX_COSTS ix86_rtx_costs
1014 #undef TARGET_ADDRESS_COST
1015 #define TARGET_ADDRESS_COST ix86_address_cost
1017 struct gcc_target targetm = TARGET_INITIALIZER;
1019 /* The svr4 ABI for the i386 says that records and unions are returned
1021 #ifndef DEFAULT_PCC_STRUCT_RETURN
1022 #define DEFAULT_PCC_STRUCT_RETURN 1
1025 /* Sometimes certain combinations of command options do not make
1026 sense on a particular target machine. You can define a macro
1027 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1028 defined, is executed once just after all the command options have
1031 Don't use this macro to turn on various extra optimizations for
1032 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1038 /* Comes from final.c -- no real reason to change it. */
1039 #define MAX_CODE_ALIGN 16
1043 const struct processor_costs *cost; /* Processor costs */
1044 const int target_enable; /* Target flags to enable. */
1045 const int target_disable; /* Target flags to disable. */
1046 const int align_loop; /* Default alignments. */
1047 const int align_loop_max_skip;
1048 const int align_jump;
1049 const int align_jump_max_skip;
1050 const int align_func;
1052 const processor_target_table[PROCESSOR_max] =
1054 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1055 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1056 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1057 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1058 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1059 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1060 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1061 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1064 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1067 const char *const name; /* processor name or nickname. */
1068 const enum processor_type processor;
1069 const enum pta_flags
1074 PTA_PREFETCH_SSE = 8,
1080 const processor_alias_table[] =
1082 {"i386", PROCESSOR_I386, 0},
1083 {"i486", PROCESSOR_I486, 0},
1084 {"i586", PROCESSOR_PENTIUM, 0},
1085 {"pentium", PROCESSOR_PENTIUM, 0},
1086 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1087 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1088 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1089 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1090 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1091 {"i686", PROCESSOR_PENTIUMPRO, 0},
1092 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1093 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1094 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1095 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1096 PTA_MMX | PTA_PREFETCH_SSE},
1097 {"k6", PROCESSOR_K6, PTA_MMX},
1098 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1099 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1100 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1102 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1103 | PTA_3DNOW | PTA_3DNOW_A},
1104 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1105 | PTA_3DNOW_A | PTA_SSE},
1106 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1107 | PTA_3DNOW_A | PTA_SSE},
1108 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1111 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1114 int const pta_size = ARRAY_SIZE (processor_alias_table);
1116 /* By default our XFmode is the 80-bit extended format. If we have
1117 use TFmode instead, it's also the 80-bit format, but with padding. */
1118 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1119 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1121 /* Set the default values for switches whose default depends on TARGET_64BIT
1122 in case they weren't overwritten by command line options. */
1125 if (flag_omit_frame_pointer == 2)
1126 flag_omit_frame_pointer = 1;
1127 if (flag_asynchronous_unwind_tables == 2)
1128 flag_asynchronous_unwind_tables = 1;
1129 if (flag_pcc_struct_return == 2)
1130 flag_pcc_struct_return = 0;
1134 if (flag_omit_frame_pointer == 2)
1135 flag_omit_frame_pointer = 0;
1136 if (flag_asynchronous_unwind_tables == 2)
1137 flag_asynchronous_unwind_tables = 0;
1138 if (flag_pcc_struct_return == 2)
1139 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1142 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1143 SUBTARGET_OVERRIDE_OPTIONS;
1146 if (!ix86_tune_string && ix86_arch_string)
1147 ix86_tune_string = ix86_arch_string;
1148 if (!ix86_tune_string)
1149 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1150 if (!ix86_arch_string)
1151 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1153 if (ix86_cmodel_string != 0)
1155 if (!strcmp (ix86_cmodel_string, "small"))
1156 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1158 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1159 else if (!strcmp (ix86_cmodel_string, "32"))
1160 ix86_cmodel = CM_32;
1161 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1162 ix86_cmodel = CM_KERNEL;
1163 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1164 ix86_cmodel = CM_MEDIUM;
1165 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1166 ix86_cmodel = CM_LARGE;
1168 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1172 ix86_cmodel = CM_32;
1174 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1176 if (ix86_asm_string != 0)
1178 if (!strcmp (ix86_asm_string, "intel"))
1179 ix86_asm_dialect = ASM_INTEL;
1180 else if (!strcmp (ix86_asm_string, "att"))
1181 ix86_asm_dialect = ASM_ATT;
1183 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1185 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1186 error ("code model `%s' not supported in the %s bit mode",
1187 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1188 if (ix86_cmodel == CM_LARGE)
1189 sorry ("code model `large' not supported yet");
1190 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1191 sorry ("%i-bit mode not compiled in",
1192 (target_flags & MASK_64BIT) ? 64 : 32);
1194 for (i = 0; i < pta_size; i++)
1195 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1197 ix86_arch = processor_alias_table[i].processor;
1198 /* Default cpu tuning to the architecture. */
1199 ix86_tune = ix86_arch;
1200 if (processor_alias_table[i].flags & PTA_MMX
1201 && !(target_flags_explicit & MASK_MMX))
1202 target_flags |= MASK_MMX;
1203 if (processor_alias_table[i].flags & PTA_3DNOW
1204 && !(target_flags_explicit & MASK_3DNOW))
1205 target_flags |= MASK_3DNOW;
1206 if (processor_alias_table[i].flags & PTA_3DNOW_A
1207 && !(target_flags_explicit & MASK_3DNOW_A))
1208 target_flags |= MASK_3DNOW_A;
1209 if (processor_alias_table[i].flags & PTA_SSE
1210 && !(target_flags_explicit & MASK_SSE))
1211 target_flags |= MASK_SSE;
1212 if (processor_alias_table[i].flags & PTA_SSE2
1213 && !(target_flags_explicit & MASK_SSE2))
1214 target_flags |= MASK_SSE2;
1215 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1216 x86_prefetch_sse = true;
1217 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1218 error ("CPU you selected does not support x86-64 instruction set");
1223 error ("bad value (%s) for -march= switch", ix86_arch_string);
1225 for (i = 0; i < pta_size; i++)
1226 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1228 ix86_tune = processor_alias_table[i].processor;
1229 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1230 error ("CPU you selected does not support x86-64 instruction set");
1233 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1234 x86_prefetch_sse = true;
1236 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1239 ix86_cost = &size_cost;
1241 ix86_cost = processor_target_table[ix86_tune].cost;
1242 target_flags |= processor_target_table[ix86_tune].target_enable;
1243 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1245 /* Arrange to set up i386_stack_locals for all functions. */
1246 init_machine_status = ix86_init_machine_status;
1248 /* Validate -mregparm= value. */
1249 if (ix86_regparm_string)
1251 i = atoi (ix86_regparm_string);
1252 if (i < 0 || i > REGPARM_MAX)
1253 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1259 ix86_regparm = REGPARM_MAX;
1261 /* If the user has provided any of the -malign-* options,
1262 warn and use that value only if -falign-* is not set.
1263 Remove this code in GCC 3.2 or later. */
1264 if (ix86_align_loops_string)
1266 warning ("-malign-loops is obsolete, use -falign-loops");
1267 if (align_loops == 0)
1269 i = atoi (ix86_align_loops_string);
1270 if (i < 0 || i > MAX_CODE_ALIGN)
1271 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1273 align_loops = 1 << i;
1277 if (ix86_align_jumps_string)
1279 warning ("-malign-jumps is obsolete, use -falign-jumps");
1280 if (align_jumps == 0)
1282 i = atoi (ix86_align_jumps_string);
1283 if (i < 0 || i > MAX_CODE_ALIGN)
1284 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1286 align_jumps = 1 << i;
1290 if (ix86_align_funcs_string)
1292 warning ("-malign-functions is obsolete, use -falign-functions");
1293 if (align_functions == 0)
1295 i = atoi (ix86_align_funcs_string);
1296 if (i < 0 || i > MAX_CODE_ALIGN)
1297 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1299 align_functions = 1 << i;
1303 /* Default align_* from the processor table. */
1304 if (align_loops == 0)
1306 align_loops = processor_target_table[ix86_tune].align_loop;
1307 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1309 if (align_jumps == 0)
1311 align_jumps = processor_target_table[ix86_tune].align_jump;
1312 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1314 if (align_functions == 0)
1316 align_functions = processor_target_table[ix86_tune].align_func;
1319 /* Validate -mpreferred-stack-boundary= value, or provide default.
1320 The default of 128 bits is for Pentium III's SSE __m128, but we
1321 don't want additional code to keep the stack aligned when
1322 optimizing for code size. */
1323 ix86_preferred_stack_boundary = (optimize_size
1324 ? TARGET_64BIT ? 128 : 32
1326 if (ix86_preferred_stack_boundary_string)
1328 i = atoi (ix86_preferred_stack_boundary_string);
1329 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1330 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1331 TARGET_64BIT ? 4 : 2);
1333 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1336 /* Validate -mbranch-cost= value, or provide default. */
1337 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1338 if (ix86_branch_cost_string)
1340 i = atoi (ix86_branch_cost_string);
1342 error ("-mbranch-cost=%d is not between 0 and 5", i);
1344 ix86_branch_cost = i;
1347 if (ix86_tls_dialect_string)
1349 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1350 ix86_tls_dialect = TLS_DIALECT_GNU;
1351 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1352 ix86_tls_dialect = TLS_DIALECT_SUN;
1354 error ("bad value (%s) for -mtls-dialect= switch",
1355 ix86_tls_dialect_string);
1358 /* Keep nonleaf frame pointers. */
1359 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1360 flag_omit_frame_pointer = 1;
1362 /* If we're doing fast math, we don't care about comparison order
1363 wrt NaNs. This lets us use a shorter comparison sequence. */
1364 if (flag_unsafe_math_optimizations)
1365 target_flags &= ~MASK_IEEE_FP;
1367 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1368 since the insns won't need emulation. */
1369 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1370 target_flags &= ~MASK_NO_FANCY_MATH_387;
1374 if (TARGET_ALIGN_DOUBLE)
1375 error ("-malign-double makes no sense in the 64bit mode");
1377 error ("-mrtd calling convention not supported in the 64bit mode");
1378 /* Enable by default the SSE and MMX builtins. */
1379 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1380 ix86_fpmath = FPMATH_SSE;
1383 ix86_fpmath = FPMATH_387;
1385 if (ix86_fpmath_string != 0)
1387 if (! strcmp (ix86_fpmath_string, "387"))
1388 ix86_fpmath = FPMATH_387;
1389 else if (! strcmp (ix86_fpmath_string, "sse"))
1393 warning ("SSE instruction set disabled, using 387 arithmetics");
1394 ix86_fpmath = FPMATH_387;
1397 ix86_fpmath = FPMATH_SSE;
1399 else if (! strcmp (ix86_fpmath_string, "387,sse")
1400 || ! strcmp (ix86_fpmath_string, "sse,387"))
1404 warning ("SSE instruction set disabled, using 387 arithmetics");
1405 ix86_fpmath = FPMATH_387;
1407 else if (!TARGET_80387)
1409 warning ("387 instruction set disabled, using SSE arithmetics");
1410 ix86_fpmath = FPMATH_SSE;
1413 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1416 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1419 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1423 target_flags |= MASK_MMX;
1424 x86_prefetch_sse = true;
1427 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1430 target_flags |= MASK_MMX;
1431 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1432 extensions it adds. */
1433 if (x86_3dnow_a & (1 << ix86_arch))
1434 target_flags |= MASK_3DNOW_A;
1436 if ((x86_accumulate_outgoing_args & TUNEMASK)
1437 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1439 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1441 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1444 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1445 p = strchr (internal_label_prefix, 'X');
1446 internal_label_prefix_len = p - internal_label_prefix;
1452 optimization_options (level, size)
1454 int size ATTRIBUTE_UNUSED;
1456 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1457 make the problem with not enough registers even worse. */
1458 #ifdef INSN_SCHEDULING
1460 flag_schedule_insns = 0;
1463 /* The default values of these switches depend on the TARGET_64BIT
1464 that is not known at this moment. Mark these values with 2 and
1465 let user the to override these. In case there is no command line option
1466 specifying them, we will set the defaults in override_options. */
1468 flag_omit_frame_pointer = 2;
1469 flag_pcc_struct_return = 2;
1470 flag_asynchronous_unwind_tables = 2;
1473 /* Table of valid machine attributes. */
1474 const struct attribute_spec ix86_attribute_table[] =
1476 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1477 /* Stdcall attribute says callee is responsible for popping arguments
1478 if they are not variable. */
1479 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1480 /* Fastcall attribute says callee is responsible for popping arguments
1481 if they are not variable. */
1482 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1483 /* Cdecl attribute says the callee is a normal C declaration */
1484 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1485 /* Regparm attribute specifies how many integer arguments are to be
1486 passed in registers. */
1487 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1488 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1489 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1490 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1491 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1493 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1494 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1495 { NULL, 0, 0, false, false, false, NULL }
1498 /* Decide whether we can make a sibling call to a function. DECL is the
1499 declaration of the function being targeted by the call and EXP is the
1500 CALL_EXPR representing the call. */
1503 ix86_function_ok_for_sibcall (decl, exp)
1507 /* If we are generating position-independent code, we cannot sibcall
1508 optimize any indirect call, or a direct call to a global function,
1509 as the PLT requires %ebx be live. */
1510 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1513 /* If we are returning floats on the 80387 register stack, we cannot
1514 make a sibcall from a function that doesn't return a float to a
1515 function that does or, conversely, from a function that does return
1516 a float to a function that doesn't; the necessary stack adjustment
1517 would not be executed. */
1518 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1519 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1522 /* If this call is indirect, we'll need to be able to use a call-clobbered
1523 register for the address of the target function. Make sure that all
1524 such registers are not used for passing parameters. */
1525 if (!decl && !TARGET_64BIT)
1527 int regparm = ix86_regparm;
1530 /* We're looking at the CALL_EXPR, we need the type of the function. */
1531 type = TREE_OPERAND (exp, 0); /* pointer expression */
1532 type = TREE_TYPE (type); /* pointer type */
1533 type = TREE_TYPE (type); /* function type */
1535 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1537 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1541 /* ??? Need to count the actual number of registers to be used,
1542 not the possible number of registers. Fix later. */
1547 /* Otherwise okay. That also includes certain types of indirect calls. */
1551 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1552 arguments as in struct attribute_spec.handler. */
1554 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1557 tree args ATTRIBUTE_UNUSED;
1558 int flags ATTRIBUTE_UNUSED;
1561 if (TREE_CODE (*node) != FUNCTION_TYPE
1562 && TREE_CODE (*node) != METHOD_TYPE
1563 && TREE_CODE (*node) != FIELD_DECL
1564 && TREE_CODE (*node) != TYPE_DECL)
1566 warning ("`%s' attribute only applies to functions",
1567 IDENTIFIER_POINTER (name));
1568 *no_add_attrs = true;
1572 if (is_attribute_p ("fastcall", name))
1574 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1576 error ("fastcall and stdcall attributes are not compatible");
1578 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1580 error ("fastcall and regparm attributes are not compatible");
1583 else if (is_attribute_p ("stdcall", name))
1585 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1587 error ("fastcall and stdcall attributes are not compatible");
1594 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1595 *no_add_attrs = true;
1601 /* Handle a "regparm" attribute;
1602 arguments as in struct attribute_spec.handler. */
1604 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1608 int flags ATTRIBUTE_UNUSED;
1611 if (TREE_CODE (*node) != FUNCTION_TYPE
1612 && TREE_CODE (*node) != METHOD_TYPE
1613 && TREE_CODE (*node) != FIELD_DECL
1614 && TREE_CODE (*node) != TYPE_DECL)
1616 warning ("`%s' attribute only applies to functions",
1617 IDENTIFIER_POINTER (name));
1618 *no_add_attrs = true;
1624 cst = TREE_VALUE (args);
1625 if (TREE_CODE (cst) != INTEGER_CST)
1627 warning ("`%s' attribute requires an integer constant argument",
1628 IDENTIFIER_POINTER (name));
1629 *no_add_attrs = true;
1631 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1633 warning ("argument to `%s' attribute larger than %d",
1634 IDENTIFIER_POINTER (name), REGPARM_MAX);
1635 *no_add_attrs = true;
1638 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1640 error ("fastcall and regparm attributes are not compatible");
1647 /* Return 0 if the attributes for two types are incompatible, 1 if they
1648 are compatible, and 2 if they are nearly compatible (which causes a
1649 warning to be generated). */
1652 ix86_comp_type_attributes (type1, type2)
1656 /* Check for mismatch of non-default calling convention. */
1657 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1659 if (TREE_CODE (type1) != FUNCTION_TYPE)
1662 /* Check for mismatched fastcall types */
1663 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1664 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1667 /* Check for mismatched return types (cdecl vs stdcall). */
1668 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1669 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1674 /* Return the regparm value for a fuctio with the indicated TYPE. */
1677 ix86_fntype_regparm (type)
1682 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1684 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1686 return ix86_regparm;
1689 /* Value is the number of bytes of arguments automatically
1690 popped when returning from a subroutine call.
1691 FUNDECL is the declaration node of the function (as a tree),
1692 FUNTYPE is the data type of the function (as a tree),
1693 or for a library call it is an identifier node for the subroutine name.
1694 SIZE is the number of bytes of arguments passed on the stack.
1696 On the 80386, the RTD insn may be used to pop them if the number
1697 of args is fixed, but if the number is variable then the caller
1698 must pop them all. RTD can't be used for library calls now
1699 because the library is compiled with the Unix compiler.
1700 Use of RTD is a selectable option, since it is incompatible with
1701 standard Unix calling sequences. If the option is not selected,
1702 the caller must always pop the args.
1704 The attribute stdcall is equivalent to RTD on a per module basis. */
1707 ix86_return_pops_args (fundecl, funtype, size)
1712 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1714 /* Cdecl functions override -mrtd, and never pop the stack. */
1715 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1717 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1718 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1719 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1723 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1724 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1725 == void_type_node)))
1729 /* Lose any fake structure return argument if it is passed on the stack. */
1730 if (aggregate_value_p (TREE_TYPE (funtype))
1733 int nregs = ix86_fntype_regparm (funtype);
1736 return GET_MODE_SIZE (Pmode);
1742 /* Argument support functions. */
1744 /* Return true when register may be used to pass function parameters. */
1746 ix86_function_arg_regno_p (regno)
1751 return (regno < REGPARM_MAX
1752 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1753 if (SSE_REGNO_P (regno) && TARGET_SSE)
1755 /* RAX is used as hidden argument to va_arg functions. */
1758 for (i = 0; i < REGPARM_MAX; i++)
1759 if (regno == x86_64_int_parameter_registers[i])
1764 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1765 for a call to a function whose data type is FNTYPE.
1766 For a library call, FNTYPE is 0. */
1769 init_cumulative_args (cum, fntype, libname, fndecl)
1770 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1771 tree fntype; /* tree ptr for function decl */
1772 rtx libname; /* SYMBOL_REF of library name or 0 */
1775 static CUMULATIVE_ARGS zero_cum;
1776 tree param, next_param;
1777 bool user_convention = false;
1779 if (TARGET_DEBUG_ARG)
1781 fprintf (stderr, "\ninit_cumulative_args (");
1783 fprintf (stderr, "fntype code = %s, ret code = %s",
1784 tree_code_name[(int) TREE_CODE (fntype)],
1785 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1787 fprintf (stderr, "no fntype");
1790 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1795 /* Set up the number of registers to use for passing arguments. */
1796 cum->nregs = ix86_regparm;
1797 cum->sse_nregs = SSE_REGPARM_MAX;
1798 if (fntype && !TARGET_64BIT)
1800 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1804 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1805 user_convention = true;
1808 cum->maybe_vaarg = false;
1810 /* Use ecx and edx registers if function has fastcall attribute */
1811 if (fntype && !TARGET_64BIT)
1813 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1817 user_convention = true;
1821 /* Use register calling convention for local functions when possible. */
1822 if (!TARGET_64BIT && !user_convention && fndecl
1823 && flag_unit_at_a_time)
1825 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1828 /* We can't use regparm(3) for nested functions as these use
1829 static chain pointer in third argument. */
1830 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1838 /* Determine if this function has variable arguments. This is
1839 indicated by the last argument being 'void_type_mode' if there
1840 are no variable arguments. If there are variable arguments, then
1841 we won't pass anything in registers */
1845 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1846 param != 0; param = next_param)
1848 next_param = TREE_CHAIN (param);
1849 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1856 cum->maybe_vaarg = true;
1860 if ((!fntype && !libname)
1861 || (fntype && !TYPE_ARG_TYPES (fntype)))
1862 cum->maybe_vaarg = 1;
1864 if (TARGET_DEBUG_ARG)
1865 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1870 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1871 of this code is to classify each 8bytes of incoming argument by the register
1872 class and assign registers accordingly. */
1874 /* Return the union class of CLASS1 and CLASS2.
1875 See the x86-64 PS ABI for details. */
1877 static enum x86_64_reg_class
1878 merge_classes (class1, class2)
1879 enum x86_64_reg_class class1, class2;
1881 /* Rule #1: If both classes are equal, this is the resulting class. */
1882 if (class1 == class2)
1885 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1887 if (class1 == X86_64_NO_CLASS)
1889 if (class2 == X86_64_NO_CLASS)
1892 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1893 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1894 return X86_64_MEMORY_CLASS;
1896 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1897 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1898 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1899 return X86_64_INTEGERSI_CLASS;
1900 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1901 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1902 return X86_64_INTEGER_CLASS;
1904 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1905 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1906 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1907 return X86_64_MEMORY_CLASS;
1909 /* Rule #6: Otherwise class SSE is used. */
1910 return X86_64_SSE_CLASS;
1913 /* Classify the argument of type TYPE and mode MODE.
1914 CLASSES will be filled by the register class used to pass each word
1915 of the operand. The number of words is returned. In case the parameter
1916 should be passed in memory, 0 is returned. As a special case for zero
1917 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1919 BIT_OFFSET is used internally for handling records and specifies offset
1920 of the offset in bits modulo 256 to avoid overflow cases.
1922 See the x86-64 PS ABI for details.
1926 classify_argument (mode, type, classes, bit_offset)
1927 enum machine_mode mode;
1929 enum x86_64_reg_class classes[MAX_CLASSES];
1933 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1934 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1936 /* Variable sized entities are always passed/returned in memory. */
1940 if (mode != VOIDmode
1941 && MUST_PASS_IN_STACK (mode, type))
1944 if (type && AGGREGATE_TYPE_P (type))
1948 enum x86_64_reg_class subclasses[MAX_CLASSES];
1950 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1954 for (i = 0; i < words; i++)
1955 classes[i] = X86_64_NO_CLASS;
1957 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1958 signalize memory class, so handle it as special case. */
1961 classes[0] = X86_64_NO_CLASS;
1965 /* Classify each field of record and merge classes. */
1966 if (TREE_CODE (type) == RECORD_TYPE)
1968 /* For classes first merge in the field of the subclasses. */
1969 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1971 tree bases = TYPE_BINFO_BASETYPES (type);
1972 int n_bases = TREE_VEC_LENGTH (bases);
1975 for (i = 0; i < n_bases; ++i)
1977 tree binfo = TREE_VEC_ELT (bases, i);
1979 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1980 tree type = BINFO_TYPE (binfo);
1982 num = classify_argument (TYPE_MODE (type),
1984 (offset + bit_offset) % 256);
1987 for (i = 0; i < num; i++)
1989 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1991 merge_classes (subclasses[i], classes[i + pos]);
1995 /* And now merge the fields of structure. */
1996 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1998 if (TREE_CODE (field) == FIELD_DECL)
2002 /* Bitfields are always classified as integer. Handle them
2003 early, since later code would consider them to be
2004 misaligned integers. */
2005 if (DECL_BIT_FIELD (field))
2007 for (i = int_bit_position (field) / 8 / 8;
2008 i < (int_bit_position (field)
2009 + tree_low_cst (DECL_SIZE (field), 0)
2012 merge_classes (X86_64_INTEGER_CLASS,
2017 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2018 TREE_TYPE (field), subclasses,
2019 (int_bit_position (field)
2020 + bit_offset) % 256);
2023 for (i = 0; i < num; i++)
2026 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2028 merge_classes (subclasses[i], classes[i + pos]);
2034 /* Arrays are handled as small records. */
2035 else if (TREE_CODE (type) == ARRAY_TYPE)
2038 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2039 TREE_TYPE (type), subclasses, bit_offset);
2043 /* The partial classes are now full classes. */
2044 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2045 subclasses[0] = X86_64_SSE_CLASS;
2046 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2047 subclasses[0] = X86_64_INTEGER_CLASS;
2049 for (i = 0; i < words; i++)
2050 classes[i] = subclasses[i % num];
2052 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2053 else if (TREE_CODE (type) == UNION_TYPE
2054 || TREE_CODE (type) == QUAL_UNION_TYPE)
2056 /* For classes first merge in the field of the subclasses. */
2057 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2059 tree bases = TYPE_BINFO_BASETYPES (type);
2060 int n_bases = TREE_VEC_LENGTH (bases);
2063 for (i = 0; i < n_bases; ++i)
2065 tree binfo = TREE_VEC_ELT (bases, i);
2067 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2068 tree type = BINFO_TYPE (binfo);
2070 num = classify_argument (TYPE_MODE (type),
2072 (offset + (bit_offset % 64)) % 256);
2075 for (i = 0; i < num; i++)
2077 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2079 merge_classes (subclasses[i], classes[i + pos]);
2083 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2085 if (TREE_CODE (field) == FIELD_DECL)
2088 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2089 TREE_TYPE (field), subclasses,
2093 for (i = 0; i < num; i++)
2094 classes[i] = merge_classes (subclasses[i], classes[i]);
2101 /* Final merger cleanup. */
2102 for (i = 0; i < words; i++)
2104 /* If one class is MEMORY, everything should be passed in
2106 if (classes[i] == X86_64_MEMORY_CLASS)
2109 /* The X86_64_SSEUP_CLASS should be always preceded by
2110 X86_64_SSE_CLASS. */
2111 if (classes[i] == X86_64_SSEUP_CLASS
2112 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2113 classes[i] = X86_64_SSE_CLASS;
2115 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2116 if (classes[i] == X86_64_X87UP_CLASS
2117 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2118 classes[i] = X86_64_SSE_CLASS;
2123 /* Compute alignment needed. We align all types to natural boundaries with
2124 exception of XFmode that is aligned to 64bits. */
2125 if (mode != VOIDmode && mode != BLKmode)
2127 int mode_alignment = GET_MODE_BITSIZE (mode);
2130 mode_alignment = 128;
2131 else if (mode == XCmode)
2132 mode_alignment = 256;
2133 /* Misaligned fields are always returned in memory. */
2134 if (bit_offset % mode_alignment)
2138 /* Classification of atomic types. */
2148 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2149 classes[0] = X86_64_INTEGERSI_CLASS;
2151 classes[0] = X86_64_INTEGER_CLASS;
2155 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2158 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2159 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2162 if (!(bit_offset % 64))
2163 classes[0] = X86_64_SSESF_CLASS;
2165 classes[0] = X86_64_SSE_CLASS;
2168 classes[0] = X86_64_SSEDF_CLASS;
2171 classes[0] = X86_64_X87_CLASS;
2172 classes[1] = X86_64_X87UP_CLASS;
2175 classes[0] = X86_64_X87_CLASS;
2176 classes[1] = X86_64_X87UP_CLASS;
2177 classes[2] = X86_64_X87_CLASS;
2178 classes[3] = X86_64_X87UP_CLASS;
2181 classes[0] = X86_64_SSEDF_CLASS;
2182 classes[1] = X86_64_SSEDF_CLASS;
2185 classes[0] = X86_64_SSE_CLASS;
2193 classes[0] = X86_64_SSE_CLASS;
2194 classes[1] = X86_64_SSEUP_CLASS;
2209 /* Examine the argument and return set number of register required in each
2210 class. Return 0 iff parameter should be passed in memory. */
2212 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2213 enum machine_mode mode;
2215 int *int_nregs, *sse_nregs;
2218 enum x86_64_reg_class class[MAX_CLASSES];
2219 int n = classify_argument (mode, type, class, 0);
2225 for (n--; n >= 0; n--)
2228 case X86_64_INTEGER_CLASS:
2229 case X86_64_INTEGERSI_CLASS:
2232 case X86_64_SSE_CLASS:
2233 case X86_64_SSESF_CLASS:
2234 case X86_64_SSEDF_CLASS:
2237 case X86_64_NO_CLASS:
2238 case X86_64_SSEUP_CLASS:
2240 case X86_64_X87_CLASS:
2241 case X86_64_X87UP_CLASS:
2245 case X86_64_MEMORY_CLASS:
2250 /* Construct container for the argument used by GCC interface. See
2251 FUNCTION_ARG for the detailed description. */
2253 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2254 enum machine_mode mode;
2257 int nintregs, nsseregs;
2261 enum machine_mode tmpmode;
2263 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2264 enum x86_64_reg_class class[MAX_CLASSES];
2268 int needed_sseregs, needed_intregs;
2269 rtx exp[MAX_CLASSES];
2272 n = classify_argument (mode, type, class, 0);
2273 if (TARGET_DEBUG_ARG)
2276 fprintf (stderr, "Memory class\n");
2279 fprintf (stderr, "Classes:");
2280 for (i = 0; i < n; i++)
2282 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2284 fprintf (stderr, "\n");
2289 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2291 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2294 /* First construct simple cases. Avoid SCmode, since we want to use
2295 single register to pass this type. */
2296 if (n == 1 && mode != SCmode)
2299 case X86_64_INTEGER_CLASS:
2300 case X86_64_INTEGERSI_CLASS:
2301 return gen_rtx_REG (mode, intreg[0]);
2302 case X86_64_SSE_CLASS:
2303 case X86_64_SSESF_CLASS:
2304 case X86_64_SSEDF_CLASS:
2305 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2306 case X86_64_X87_CLASS:
2307 return gen_rtx_REG (mode, FIRST_STACK_REG);
2308 case X86_64_NO_CLASS:
2309 /* Zero sized array, struct or class. */
2314 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2315 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2317 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2318 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2319 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2320 && class[1] == X86_64_INTEGER_CLASS
2321 && (mode == CDImode || mode == TImode)
2322 && intreg[0] + 1 == intreg[1])
2323 return gen_rtx_REG (mode, intreg[0]);
2325 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2326 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2327 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2329 /* Otherwise figure out the entries of the PARALLEL. */
2330 for (i = 0; i < n; i++)
2334 case X86_64_NO_CLASS:
2336 case X86_64_INTEGER_CLASS:
2337 case X86_64_INTEGERSI_CLASS:
2338 /* Merge TImodes on aligned occasions here too. */
2339 if (i * 8 + 8 > bytes)
2340 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2341 else if (class[i] == X86_64_INTEGERSI_CLASS)
2345 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2346 if (tmpmode == BLKmode)
2348 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2349 gen_rtx_REG (tmpmode, *intreg),
2353 case X86_64_SSESF_CLASS:
2354 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2355 gen_rtx_REG (SFmode,
2356 SSE_REGNO (sse_regno)),
2360 case X86_64_SSEDF_CLASS:
2361 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2362 gen_rtx_REG (DFmode,
2363 SSE_REGNO (sse_regno)),
2367 case X86_64_SSE_CLASS:
2368 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2372 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2373 gen_rtx_REG (tmpmode,
2374 SSE_REGNO (sse_regno)),
2376 if (tmpmode == TImode)
2384 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2385 for (i = 0; i < nexps; i++)
2386 XVECEXP (ret, 0, i) = exp [i];
2390 /* Update the data in CUM to advance over an argument
2391 of mode MODE and data type TYPE.
2392 (TYPE is null for libcalls where that information may not be available.) */
2395 function_arg_advance (cum, mode, type, named)
2396 CUMULATIVE_ARGS *cum; /* current arg information */
2397 enum machine_mode mode; /* current arg mode */
2398 tree type; /* type of the argument or 0 if lib support */
2399 int named; /* whether or not the argument was named */
2402 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2403 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2405 if (TARGET_DEBUG_ARG)
2407 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2408 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2411 int int_nregs, sse_nregs;
2412 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2413 cum->words += words;
2414 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2416 cum->nregs -= int_nregs;
2417 cum->sse_nregs -= sse_nregs;
2418 cum->regno += int_nregs;
2419 cum->sse_regno += sse_nregs;
2422 cum->words += words;
2426 if (TARGET_SSE && mode == TImode)
2428 cum->sse_words += words;
2429 cum->sse_nregs -= 1;
2430 cum->sse_regno += 1;
2431 if (cum->sse_nregs <= 0)
2439 cum->words += words;
2440 cum->nregs -= words;
2441 cum->regno += words;
2443 if (cum->nregs <= 0)
2453 /* Define where to put the arguments to a function.
2454 Value is zero to push the argument on the stack,
2455 or a hard register in which to store the argument.
2457 MODE is the argument's machine mode.
2458 TYPE is the data type of the argument (as a tree).
2459 This is null for libcalls where that information may
2461 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2462 the preceding args and about the function being called.
2463 NAMED is nonzero if this argument is a named parameter
2464 (otherwise it is an extra parameter matching an ellipsis). */
2467 function_arg (cum, mode, type, named)
2468 CUMULATIVE_ARGS *cum; /* current arg information */
2469 enum machine_mode mode; /* current arg mode */
2470 tree type; /* type of the argument or 0 if lib support */
2471 int named; /* != 0 for normal args, == 0 for ... args */
2475 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2476 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2478 /* Handle a hidden AL argument containing number of registers for varargs
2479 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2481 if (mode == VOIDmode)
2484 return GEN_INT (cum->maybe_vaarg
2485 ? (cum->sse_nregs < 0
2493 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2494 &x86_64_int_parameter_registers [cum->regno],
2499 /* For now, pass fp/complex values on the stack. */
2511 if (words <= cum->nregs)
2513 int regno = cum->regno;
2515 /* Fastcall allocates the first two DWORD (SImode) or
2516 smaller arguments to ECX and EDX. */
2519 if (mode == BLKmode || mode == DImode)
2522 /* ECX not EAX is the first allocated register. */
2526 ret = gen_rtx_REG (mode, regno);
2531 ret = gen_rtx_REG (mode, cum->sse_regno);
2535 if (TARGET_DEBUG_ARG)
2538 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2539 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2542 print_simple_rtl (stderr, ret);
2544 fprintf (stderr, ", stack");
2546 fprintf (stderr, " )\n");
2552 /* A C expression that indicates when an argument must be passed by
2553 reference. If nonzero for an argument, a copy of that argument is
2554 made in memory and a pointer to the argument is passed instead of
2555 the argument itself. The pointer is passed in whatever way is
2556 appropriate for passing a pointer to that type. */
2559 function_arg_pass_by_reference (cum, mode, type, named)
2560 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2561 enum machine_mode mode ATTRIBUTE_UNUSED;
2563 int named ATTRIBUTE_UNUSED;
2568 if (type && int_size_in_bytes (type) == -1)
2570 if (TARGET_DEBUG_ARG)
2571 fprintf (stderr, "function_arg_pass_by_reference\n");
2578 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2581 contains_128bit_aligned_vector_p (type)
2584 enum machine_mode mode = TYPE_MODE (type);
2585 if (SSE_REG_MODE_P (mode)
2586 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2588 if (TYPE_ALIGN (type) < 128)
2591 if (AGGREGATE_TYPE_P (type))
2593 /* Walk the agregates recursivly. */
2594 if (TREE_CODE (type) == RECORD_TYPE
2595 || TREE_CODE (type) == UNION_TYPE
2596 || TREE_CODE (type) == QUAL_UNION_TYPE)
2600 if (TYPE_BINFO (type) != NULL
2601 && TYPE_BINFO_BASETYPES (type) != NULL)
2603 tree bases = TYPE_BINFO_BASETYPES (type);
2604 int n_bases = TREE_VEC_LENGTH (bases);
2607 for (i = 0; i < n_bases; ++i)
2609 tree binfo = TREE_VEC_ELT (bases, i);
2610 tree type = BINFO_TYPE (binfo);
2612 if (contains_128bit_aligned_vector_p (type))
2616 /* And now merge the fields of structure. */
2617 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2619 if (TREE_CODE (field) == FIELD_DECL
2620 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2624 /* Just for use if some languages passes arrays by value. */
2625 else if (TREE_CODE (type) == ARRAY_TYPE)
2627 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2636 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2640 ix86_function_arg_boundary (mode, type)
2641 enum machine_mode mode;
2646 align = TYPE_ALIGN (type);
2648 align = GET_MODE_ALIGNMENT (mode);
2649 if (align < PARM_BOUNDARY)
2650 align = PARM_BOUNDARY;
2653 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2654 make an exception for SSE modes since these require 128bit
2657 The handling here differs from field_alignment. ICC aligns MMX
2658 arguments to 4 byte boundaries, while structure fields are aligned
2659 to 8 byte boundaries. */
2662 if (!SSE_REG_MODE_P (mode))
2663 align = PARM_BOUNDARY;
2667 if (!contains_128bit_aligned_vector_p (type))
2668 align = PARM_BOUNDARY;
2670 if (align != PARM_BOUNDARY && !TARGET_SSE)
2678 /* Return true if N is a possible register number of function value. */
2680 ix86_function_value_regno_p (regno)
2685 return ((regno) == 0
2686 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2687 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2689 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2690 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2691 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2694 /* Define how to find the value returned by a function.
2695 VALTYPE is the data type of the value (as a tree).
2696 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2697 otherwise, FUNC is 0. */
2699 ix86_function_value (valtype)
2704 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2705 REGPARM_MAX, SSE_REGPARM_MAX,
2706 x86_64_int_return_registers, 0);
2707 /* For zero sized structures, construct_container return NULL, but we need
2708 to keep rest of compiler happy by returning meaningful value. */
2710 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2714 return gen_rtx_REG (TYPE_MODE (valtype),
2715 ix86_value_regno (TYPE_MODE (valtype)));
2718 /* Return false iff type is returned in memory. */
2720 ix86_return_in_memory (type)
2723 int needed_intregs, needed_sseregs;
2726 return !examine_argument (TYPE_MODE (type), type, 1,
2727 &needed_intregs, &needed_sseregs);
2731 if (TYPE_MODE (type) == BLKmode)
2733 else if (MS_AGGREGATE_RETURN
2734 && AGGREGATE_TYPE_P (type)
2735 && int_size_in_bytes(type) <= 8)
2737 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2738 && int_size_in_bytes (type) == 8)
2739 || (int_size_in_bytes (type) > 12
2740 && TYPE_MODE (type) != TImode
2741 && TYPE_MODE (type) != TFmode
2742 && !VECTOR_MODE_P (TYPE_MODE (type))))
2748 /* Define how to find the value returned by a library function
2749 assuming the value has mode MODE. */
2751 ix86_libcall_value (mode)
2752 enum machine_mode mode;
2762 return gen_rtx_REG (mode, FIRST_SSE_REG);
2765 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2767 return gen_rtx_REG (mode, 0);
2771 return gen_rtx_REG (mode, ix86_value_regno (mode));
2774 /* Given a mode, return the register to use for a return value. */
2777 ix86_value_regno (mode)
2778 enum machine_mode mode;
2780 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2781 return FIRST_FLOAT_REG;
2782 if (mode == TImode || VECTOR_MODE_P (mode))
2783 return FIRST_SSE_REG;
2787 /* Create the va_list data type. */
2790 ix86_build_va_list ()
2792 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2794 /* For i386 we use plain pointer to argument area. */
2796 return build_pointer_type (char_type_node);
2798 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2799 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2801 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2802 unsigned_type_node);
2803 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2804 unsigned_type_node);
2805 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2807 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2810 DECL_FIELD_CONTEXT (f_gpr) = record;
2811 DECL_FIELD_CONTEXT (f_fpr) = record;
2812 DECL_FIELD_CONTEXT (f_ovf) = record;
2813 DECL_FIELD_CONTEXT (f_sav) = record;
2815 TREE_CHAIN (record) = type_decl;
2816 TYPE_NAME (record) = type_decl;
2817 TYPE_FIELDS (record) = f_gpr;
2818 TREE_CHAIN (f_gpr) = f_fpr;
2819 TREE_CHAIN (f_fpr) = f_ovf;
2820 TREE_CHAIN (f_ovf) = f_sav;
2822 layout_type (record);
2824 /* The correct type is an array type of one element. */
2825 return build_array_type (record, build_index_type (size_zero_node));
2828 /* Perform any needed actions needed for a function that is receiving a
2829 variable number of arguments.
2833 MODE and TYPE are the mode and type of the current parameter.
2835 PRETEND_SIZE is a variable that should be set to the amount of stack
2836 that must be pushed by the prolog to pretend that our caller pushed
2839 Normally, this macro will push all remaining incoming registers on the
2840 stack and set PRETEND_SIZE to the length of the registers pushed. */
2843 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2844 CUMULATIVE_ARGS *cum;
2845 enum machine_mode mode;
2847 int *pretend_size ATTRIBUTE_UNUSED;
2851 CUMULATIVE_ARGS next_cum;
2852 rtx save_area = NULL_RTX, mem;
2865 /* Indicate to allocate space on the stack for varargs save area. */
2866 ix86_save_varrargs_registers = 1;
2868 fntype = TREE_TYPE (current_function_decl);
2869 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2870 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2871 != void_type_node));
2873 /* For varargs, we do not want to skip the dummy va_dcl argument.
2874 For stdargs, we do want to skip the last named argument. */
2877 function_arg_advance (&next_cum, mode, type, 1);
2880 save_area = frame_pointer_rtx;
2882 set = get_varargs_alias_set ();
2884 for (i = next_cum.regno; i < ix86_regparm; i++)
2886 mem = gen_rtx_MEM (Pmode,
2887 plus_constant (save_area, i * UNITS_PER_WORD));
2888 set_mem_alias_set (mem, set);
2889 emit_move_insn (mem, gen_rtx_REG (Pmode,
2890 x86_64_int_parameter_registers[i]));
2893 if (next_cum.sse_nregs)
2895 /* Now emit code to save SSE registers. The AX parameter contains number
2896 of SSE parameter registers used to call this function. We use
2897 sse_prologue_save insn template that produces computed jump across
2898 SSE saves. We need some preparation work to get this working. */
2900 label = gen_label_rtx ();
2901 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2903 /* Compute address to jump to :
2904 label - 5*eax + nnamed_sse_arguments*5 */
2905 tmp_reg = gen_reg_rtx (Pmode);
2906 nsse_reg = gen_reg_rtx (Pmode);
2907 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2908 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2909 gen_rtx_MULT (Pmode, nsse_reg,
2911 if (next_cum.sse_regno)
2914 gen_rtx_CONST (DImode,
2915 gen_rtx_PLUS (DImode,
2917 GEN_INT (next_cum.sse_regno * 4))));
2919 emit_move_insn (nsse_reg, label_ref);
2920 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2922 /* Compute address of memory block we save into. We always use pointer
2923 pointing 127 bytes after first byte to store - this is needed to keep
2924 instruction size limited by 4 bytes. */
2925 tmp_reg = gen_reg_rtx (Pmode);
2926 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2927 plus_constant (save_area,
2928 8 * REGPARM_MAX + 127)));
2929 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2930 set_mem_alias_set (mem, set);
2931 set_mem_align (mem, BITS_PER_WORD);
2933 /* And finally do the dirty job! */
2934 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2935 GEN_INT (next_cum.sse_regno), label));
2940 /* Implement va_start. */
2943 ix86_va_start (valist, nextarg)
2947 HOST_WIDE_INT words, n_gpr, n_fpr;
2948 tree f_gpr, f_fpr, f_ovf, f_sav;
2949 tree gpr, fpr, ovf, sav, t;
2951 /* Only 64bit target needs something special. */
2954 std_expand_builtin_va_start (valist, nextarg);
2958 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2959 f_fpr = TREE_CHAIN (f_gpr);
2960 f_ovf = TREE_CHAIN (f_fpr);
2961 f_sav = TREE_CHAIN (f_ovf);
2963 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2964 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2965 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2966 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2967 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2969 /* Count number of gp and fp argument registers used. */
2970 words = current_function_args_info.words;
2971 n_gpr = current_function_args_info.regno;
2972 n_fpr = current_function_args_info.sse_regno;
2974 if (TARGET_DEBUG_ARG)
2975 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2976 (int) words, (int) n_gpr, (int) n_fpr);
2978 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2979 build_int_2 (n_gpr * 8, 0));
2980 TREE_SIDE_EFFECTS (t) = 1;
2981 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2983 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2984 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2985 TREE_SIDE_EFFECTS (t) = 1;
2986 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2988 /* Find the overflow area. */
2989 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2991 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2992 build_int_2 (words * UNITS_PER_WORD, 0));
2993 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2994 TREE_SIDE_EFFECTS (t) = 1;
2995 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2997 /* Find the register save area.
2998 Prologue of the function save it right above stack frame. */
2999 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3000 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3001 TREE_SIDE_EFFECTS (t) = 1;
3002 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3005 /* Implement va_arg. */
3007 ix86_va_arg (valist, type)
3010 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3011 tree f_gpr, f_fpr, f_ovf, f_sav;
3012 tree gpr, fpr, ovf, sav, t;
3014 rtx lab_false, lab_over = NULL_RTX;
3019 /* Only 64bit target needs something special. */
3022 return std_expand_builtin_va_arg (valist, type);
3025 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3026 f_fpr = TREE_CHAIN (f_gpr);
3027 f_ovf = TREE_CHAIN (f_fpr);
3028 f_sav = TREE_CHAIN (f_ovf);
3030 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3031 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3032 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3033 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3034 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3036 size = int_size_in_bytes (type);
3039 /* Passed by reference. */
3041 type = build_pointer_type (type);
3042 size = int_size_in_bytes (type);
3044 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3046 container = construct_container (TYPE_MODE (type), type, 0,
3047 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3049 * Pull the value out of the saved registers ...
3052 addr_rtx = gen_reg_rtx (Pmode);
3056 rtx int_addr_rtx, sse_addr_rtx;
3057 int needed_intregs, needed_sseregs;
3060 lab_over = gen_label_rtx ();
3061 lab_false = gen_label_rtx ();
3063 examine_argument (TYPE_MODE (type), type, 0,
3064 &needed_intregs, &needed_sseregs);
3067 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3068 || TYPE_ALIGN (type) > 128);
3070 /* In case we are passing structure, verify that it is consecutive block
3071 on the register save area. If not we need to do moves. */
3072 if (!need_temp && !REG_P (container))
3074 /* Verify that all registers are strictly consecutive */
3075 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3079 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3081 rtx slot = XVECEXP (container, 0, i);
3082 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3083 || INTVAL (XEXP (slot, 1)) != i * 16)
3091 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3093 rtx slot = XVECEXP (container, 0, i);
3094 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3095 || INTVAL (XEXP (slot, 1)) != i * 8)
3102 int_addr_rtx = addr_rtx;
3103 sse_addr_rtx = addr_rtx;
3107 int_addr_rtx = gen_reg_rtx (Pmode);
3108 sse_addr_rtx = gen_reg_rtx (Pmode);
3110 /* First ensure that we fit completely in registers. */
3113 emit_cmp_and_jump_insns (expand_expr
3114 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3115 GEN_INT ((REGPARM_MAX - needed_intregs +
3116 1) * 8), GE, const1_rtx, SImode,
3121 emit_cmp_and_jump_insns (expand_expr
3122 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3123 GEN_INT ((SSE_REGPARM_MAX -
3124 needed_sseregs + 1) * 16 +
3125 REGPARM_MAX * 8), GE, const1_rtx,
3126 SImode, 1, lab_false);
3129 /* Compute index to start of area used for integer regs. */
3132 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3133 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3134 if (r != int_addr_rtx)
3135 emit_move_insn (int_addr_rtx, r);
3139 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3140 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3141 if (r != sse_addr_rtx)
3142 emit_move_insn (sse_addr_rtx, r);
3149 /* Never use the memory itself, as it has the alias set. */
3150 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3151 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3152 set_mem_alias_set (mem, get_varargs_alias_set ());
3153 set_mem_align (mem, BITS_PER_UNIT);
3155 for (i = 0; i < XVECLEN (container, 0); i++)
3157 rtx slot = XVECEXP (container, 0, i);
3158 rtx reg = XEXP (slot, 0);
3159 enum machine_mode mode = GET_MODE (reg);
3165 if (SSE_REGNO_P (REGNO (reg)))
3167 src_addr = sse_addr_rtx;
3168 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3172 src_addr = int_addr_rtx;
3173 src_offset = REGNO (reg) * 8;
3175 src_mem = gen_rtx_MEM (mode, src_addr);
3176 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3177 src_mem = adjust_address (src_mem, mode, src_offset);
3178 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3179 emit_move_insn (dest_mem, src_mem);
3186 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3187 build_int_2 (needed_intregs * 8, 0));
3188 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3189 TREE_SIDE_EFFECTS (t) = 1;
3190 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3195 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3196 build_int_2 (needed_sseregs * 16, 0));
3197 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3198 TREE_SIDE_EFFECTS (t) = 1;
3199 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3202 emit_jump_insn (gen_jump (lab_over));
3204 emit_label (lab_false);
3207 /* ... otherwise out of the overflow area. */
3209 /* Care for on-stack alignment if needed. */
3210 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3214 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3215 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3216 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3220 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3222 emit_move_insn (addr_rtx, r);
3225 build (PLUS_EXPR, TREE_TYPE (t), t,
3226 build_int_2 (rsize * UNITS_PER_WORD, 0));
3227 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3228 TREE_SIDE_EFFECTS (t) = 1;
3229 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3232 emit_label (lab_over);
3236 r = gen_rtx_MEM (Pmode, addr_rtx);
3237 set_mem_alias_set (r, get_varargs_alias_set ());
3238 emit_move_insn (addr_rtx, r);
3244 /* Return nonzero if OP is either a i387 or SSE fp register. */
3246 any_fp_register_operand (op, mode)
3248 enum machine_mode mode ATTRIBUTE_UNUSED;
3250 return ANY_FP_REG_P (op);
3253 /* Return nonzero if OP is an i387 fp register. */
3255 fp_register_operand (op, mode)
3257 enum machine_mode mode ATTRIBUTE_UNUSED;
3259 return FP_REG_P (op);
3262 /* Return nonzero if OP is a non-fp register_operand. */
3264 register_and_not_any_fp_reg_operand (op, mode)
3266 enum machine_mode mode;
3268 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3271 /* Return nonzero if OP is a register operand other than an
3272 i387 fp register. */
3274 register_and_not_fp_reg_operand (op, mode)
3276 enum machine_mode mode;
3278 return register_operand (op, mode) && !FP_REG_P (op);
3281 /* Return nonzero if OP is general operand representable on x86_64. */
3284 x86_64_general_operand (op, mode)
3286 enum machine_mode mode;
3289 return general_operand (op, mode);
3290 if (nonimmediate_operand (op, mode))
3292 return x86_64_sign_extended_value (op);
3295 /* Return nonzero if OP is general operand representable on x86_64
3296 as either sign extended or zero extended constant. */
3299 x86_64_szext_general_operand (op, mode)
3301 enum machine_mode mode;
3304 return general_operand (op, mode);
3305 if (nonimmediate_operand (op, mode))
3307 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3310 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3313 x86_64_nonmemory_operand (op, mode)
3315 enum machine_mode mode;
3318 return nonmemory_operand (op, mode);
3319 if (register_operand (op, mode))
3321 return x86_64_sign_extended_value (op);
3324 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3327 x86_64_movabs_operand (op, mode)
3329 enum machine_mode mode;
3331 if (!TARGET_64BIT || !flag_pic)
3332 return nonmemory_operand (op, mode);
3333 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3335 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3340 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3343 x86_64_szext_nonmemory_operand (op, mode)
3345 enum machine_mode mode;
3348 return nonmemory_operand (op, mode);
3349 if (register_operand (op, mode))
3351 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3354 /* Return nonzero if OP is immediate operand representable on x86_64. */
3357 x86_64_immediate_operand (op, mode)
3359 enum machine_mode mode;
3362 return immediate_operand (op, mode);
3363 return x86_64_sign_extended_value (op);
3366 /* Return nonzero if OP is immediate operand representable on x86_64. */
3369 x86_64_zext_immediate_operand (op, mode)
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3373 return x86_64_zero_extended_value (op);
3376 /* Return nonzero if OP is (const_int 1), else return zero. */
3379 const_int_1_operand (op, mode)
3381 enum machine_mode mode ATTRIBUTE_UNUSED;
3383 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3386 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3387 for shift & compare patterns, as shifting by 0 does not change flags),
3388 else return zero. */
3391 const_int_1_31_operand (op, mode)
3393 enum machine_mode mode ATTRIBUTE_UNUSED;
3395 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3398 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3399 reference and a constant. */
3402 symbolic_operand (op, mode)
3404 enum machine_mode mode ATTRIBUTE_UNUSED;
3406 switch (GET_CODE (op))
3414 if (GET_CODE (op) == SYMBOL_REF
3415 || GET_CODE (op) == LABEL_REF
3416 || (GET_CODE (op) == UNSPEC
3417 && (XINT (op, 1) == UNSPEC_GOT
3418 || XINT (op, 1) == UNSPEC_GOTOFF
3419 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3421 if (GET_CODE (op) != PLUS
3422 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3426 if (GET_CODE (op) == SYMBOL_REF
3427 || GET_CODE (op) == LABEL_REF)
3429 /* Only @GOTOFF gets offsets. */
3430 if (GET_CODE (op) != UNSPEC
3431 || XINT (op, 1) != UNSPEC_GOTOFF)
3434 op = XVECEXP (op, 0, 0);
3435 if (GET_CODE (op) == SYMBOL_REF
3436 || GET_CODE (op) == LABEL_REF)
3445 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3448 pic_symbolic_operand (op, mode)
3450 enum machine_mode mode ATTRIBUTE_UNUSED;
3452 if (GET_CODE (op) != CONST)
3457 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3462 if (GET_CODE (op) == UNSPEC)
3464 if (GET_CODE (op) != PLUS
3465 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3468 if (GET_CODE (op) == UNSPEC)
3474 /* Return true if OP is a symbolic operand that resolves locally. */
3477 local_symbolic_operand (op, mode)
3479 enum machine_mode mode ATTRIBUTE_UNUSED;
3481 if (GET_CODE (op) == CONST
3482 && GET_CODE (XEXP (op, 0)) == PLUS
3483 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3484 op = XEXP (XEXP (op, 0), 0);
3486 if (GET_CODE (op) == LABEL_REF)
3489 if (GET_CODE (op) != SYMBOL_REF)
3492 /* These we've been told are local by varasm and encode_section_info
3494 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3497 /* There is, however, a not insubstantial body of code in the rest of
3498 the compiler that assumes it can just stick the results of
3499 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3500 /* ??? This is a hack. Should update the body of the compiler to
3501 always create a DECL an invoke targetm.encode_section_info. */
3502 if (strncmp (XSTR (op, 0), internal_label_prefix,
3503 internal_label_prefix_len) == 0)
3509 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3512 tls_symbolic_operand (op, mode)
3514 enum machine_mode mode ATTRIBUTE_UNUSED;
3516 const char *symbol_str;
3518 if (GET_CODE (op) != SYMBOL_REF)
3520 symbol_str = XSTR (op, 0);
3522 if (symbol_str[0] != '%')
3524 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3528 tls_symbolic_operand_1 (op, kind)
3530 enum tls_model kind;
3532 const char *symbol_str;
3534 if (GET_CODE (op) != SYMBOL_REF)
3536 symbol_str = XSTR (op, 0);
3538 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3542 global_dynamic_symbolic_operand (op, mode)
3544 enum machine_mode mode ATTRIBUTE_UNUSED;
3546 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3550 local_dynamic_symbolic_operand (op, mode)
3552 enum machine_mode mode ATTRIBUTE_UNUSED;
3554 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3558 initial_exec_symbolic_operand (op, mode)
3560 enum machine_mode mode ATTRIBUTE_UNUSED;
3562 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3566 local_exec_symbolic_operand (op, mode)
3568 enum machine_mode mode ATTRIBUTE_UNUSED;
3570 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3573 /* Test for a valid operand for a call instruction. Don't allow the
3574 arg pointer register or virtual regs since they may decay into
3575 reg + const, which the patterns can't handle. */
3578 call_insn_operand (op, mode)
3580 enum machine_mode mode ATTRIBUTE_UNUSED;
3582 /* Disallow indirect through a virtual register. This leads to
3583 compiler aborts when trying to eliminate them. */
3584 if (GET_CODE (op) == REG
3585 && (op == arg_pointer_rtx
3586 || op == frame_pointer_rtx
3587 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3588 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3591 /* Disallow `call 1234'. Due to varying assembler lameness this
3592 gets either rejected or translated to `call .+1234'. */
3593 if (GET_CODE (op) == CONST_INT)
3596 /* Explicitly allow SYMBOL_REF even if pic. */
3597 if (GET_CODE (op) == SYMBOL_REF)
3600 /* Otherwise we can allow any general_operand in the address. */
3601 return general_operand (op, Pmode);
3604 /* Test for a valid operand for a call instruction. Don't allow the
3605 arg pointer register or virtual regs since they may decay into
3606 reg + const, which the patterns can't handle. */
3609 sibcall_insn_operand (op, mode)
3611 enum machine_mode mode ATTRIBUTE_UNUSED;
3613 /* Disallow indirect through a virtual register. This leads to
3614 compiler aborts when trying to eliminate them. */
3615 if (GET_CODE (op) == REG
3616 && (op == arg_pointer_rtx
3617 || op == frame_pointer_rtx
3618 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3619 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3622 /* Explicitly allow SYMBOL_REF even if pic. */
3623 if (GET_CODE (op) == SYMBOL_REF)
3626 /* Otherwise we can only allow register operands. */
3627 return register_operand (op, Pmode);
3631 constant_call_address_operand (op, mode)
3633 enum machine_mode mode ATTRIBUTE_UNUSED;
3635 if (GET_CODE (op) == CONST
3636 && GET_CODE (XEXP (op, 0)) == PLUS
3637 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3638 op = XEXP (XEXP (op, 0), 0);
3639 return GET_CODE (op) == SYMBOL_REF;
3642 /* Match exactly zero and one. */
3645 const0_operand (op, mode)
3647 enum machine_mode mode;
3649 return op == CONST0_RTX (mode);
3653 const1_operand (op, mode)
3655 enum machine_mode mode ATTRIBUTE_UNUSED;
3657 return op == const1_rtx;
3660 /* Match 2, 4, or 8. Used for leal multiplicands. */
3663 const248_operand (op, mode)
3665 enum machine_mode mode ATTRIBUTE_UNUSED;
3667 return (GET_CODE (op) == CONST_INT
3668 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3671 /* True if this is a constant appropriate for an increment or decrement. */
3674 incdec_operand (op, mode)
3676 enum machine_mode mode ATTRIBUTE_UNUSED;
3678 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3679 registers, since carry flag is not set. */
3680 if (TARGET_PENTIUM4 && !optimize_size)
3682 return op == const1_rtx || op == constm1_rtx;
3685 /* Return nonzero if OP is acceptable as operand of DImode shift
3689 shiftdi_operand (op, mode)
3691 enum machine_mode mode ATTRIBUTE_UNUSED;
3694 return nonimmediate_operand (op, mode);
3696 return register_operand (op, mode);
3699 /* Return false if this is the stack pointer, or any other fake
3700 register eliminable to the stack pointer. Otherwise, this is
3703 This is used to prevent esp from being used as an index reg.
3704 Which would only happen in pathological cases. */
3707 reg_no_sp_operand (op, mode)
3709 enum machine_mode mode;
3712 if (GET_CODE (t) == SUBREG)
3714 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3717 return register_operand (op, mode);
3721 mmx_reg_operand (op, mode)
3723 enum machine_mode mode ATTRIBUTE_UNUSED;
3725 return MMX_REG_P (op);
3728 /* Return false if this is any eliminable register. Otherwise
3732 general_no_elim_operand (op, mode)
3734 enum machine_mode mode;
3737 if (GET_CODE (t) == SUBREG)
3739 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3740 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3741 || t == virtual_stack_dynamic_rtx)
3744 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3745 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3748 return general_operand (op, mode);
3751 /* Return false if this is any eliminable register. Otherwise
3752 register_operand or const_int. */
3755 nonmemory_no_elim_operand (op, mode)
3757 enum machine_mode mode;
3760 if (GET_CODE (t) == SUBREG)
3762 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3763 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3764 || t == virtual_stack_dynamic_rtx)
3767 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3770 /* Return false if this is any eliminable register or stack register,
3771 otherwise work like register_operand. */
3774 index_register_operand (op, mode)
3776 enum machine_mode mode;
3779 if (GET_CODE (t) == SUBREG)
3783 if (t == arg_pointer_rtx
3784 || t == frame_pointer_rtx
3785 || t == virtual_incoming_args_rtx
3786 || t == virtual_stack_vars_rtx
3787 || t == virtual_stack_dynamic_rtx
3788 || REGNO (t) == STACK_POINTER_REGNUM)
3791 return general_operand (op, mode);
3794 /* Return true if op is a Q_REGS class register. */
3797 q_regs_operand (op, mode)
3799 enum machine_mode mode;
3801 if (mode != VOIDmode && GET_MODE (op) != mode)
3803 if (GET_CODE (op) == SUBREG)
3804 op = SUBREG_REG (op);
3805 return ANY_QI_REG_P (op);
3808 /* Return true if op is an flags register. */
3811 flags_reg_operand (op, mode)
3813 enum machine_mode mode;
3815 if (mode != VOIDmode && GET_MODE (op) != mode)
3817 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3820 /* Return true if op is a NON_Q_REGS class register. */
3823 non_q_regs_operand (op, mode)
3825 enum machine_mode mode;
3827 if (mode != VOIDmode && GET_MODE (op) != mode)
3829 if (GET_CODE (op) == SUBREG)
3830 op = SUBREG_REG (op);
3831 return NON_QI_REG_P (op);
3835 zero_extended_scalar_load_operand (op, mode)
3837 enum machine_mode mode ATTRIBUTE_UNUSED;
3840 if (GET_CODE (op) != MEM)
3842 op = maybe_get_pool_constant (op);
3845 if (GET_CODE (op) != CONST_VECTOR)
3848 (GET_MODE_SIZE (GET_MODE (op)) /
3849 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3850 for (n_elts--; n_elts > 0; n_elts--)
3852 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3853 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3859 /* Return 1 when OP is operand acceptable for standard SSE move. */
3861 vector_move_operand (op, mode)
3863 enum machine_mode mode;
3865 if (nonimmediate_operand (op, mode))
3867 if (GET_MODE (op) != mode && mode != VOIDmode)
3869 return (op == CONST0_RTX (GET_MODE (op)));
3872 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3875 sse_comparison_operator (op, mode)
3877 enum machine_mode mode ATTRIBUTE_UNUSED;
3879 enum rtx_code code = GET_CODE (op);
3882 /* Operations supported directly. */
3892 /* These are equivalent to ones above in non-IEEE comparisons. */
3899 return !TARGET_IEEE_FP;
3904 /* Return 1 if OP is a valid comparison operator in valid mode. */
3906 ix86_comparison_operator (op, mode)
3908 enum machine_mode mode;
3910 enum machine_mode inmode;
3911 enum rtx_code code = GET_CODE (op);
3912 if (mode != VOIDmode && GET_MODE (op) != mode)
3914 if (GET_RTX_CLASS (code) != '<')
3916 inmode = GET_MODE (XEXP (op, 0));
3918 if (inmode == CCFPmode || inmode == CCFPUmode)
3920 enum rtx_code second_code, bypass_code;
3921 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3922 return (bypass_code == NIL && second_code == NIL);
3929 if (inmode == CCmode || inmode == CCGCmode
3930 || inmode == CCGOCmode || inmode == CCNOmode)
3933 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3934 if (inmode == CCmode)
3938 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3946 /* Return 1 if OP is a valid comparison operator testing carry flag
3949 ix86_carry_flag_operator (op, mode)
3951 enum machine_mode mode;
3953 enum machine_mode inmode;
3954 enum rtx_code code = GET_CODE (op);
3956 if (mode != VOIDmode && GET_MODE (op) != mode)
3958 if (GET_RTX_CLASS (code) != '<')
3960 inmode = GET_MODE (XEXP (op, 0));
3961 if (GET_CODE (XEXP (op, 0)) != REG
3962 || REGNO (XEXP (op, 0)) != 17
3963 || XEXP (op, 1) != const0_rtx)
3966 if (inmode == CCFPmode || inmode == CCFPUmode)
3968 enum rtx_code second_code, bypass_code;
3970 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3971 if (bypass_code != NIL || second_code != NIL)
3973 code = ix86_fp_compare_code_to_integer (code);
3975 else if (inmode != CCmode)
3980 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3983 fcmov_comparison_operator (op, mode)
3985 enum machine_mode mode;
3987 enum machine_mode inmode;
3988 enum rtx_code code = GET_CODE (op);
3990 if (mode != VOIDmode && GET_MODE (op) != mode)
3992 if (GET_RTX_CLASS (code) != '<')
3994 inmode = GET_MODE (XEXP (op, 0));
3995 if (inmode == CCFPmode || inmode == CCFPUmode)
3997 enum rtx_code second_code, bypass_code;
3999 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4000 if (bypass_code != NIL || second_code != NIL)
4002 code = ix86_fp_compare_code_to_integer (code);
4004 /* i387 supports just limited amount of conditional codes. */
4007 case LTU: case GTU: case LEU: case GEU:
4008 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4011 case ORDERED: case UNORDERED:
4019 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4022 promotable_binary_operator (op, mode)
4024 enum machine_mode mode ATTRIBUTE_UNUSED;
4026 switch (GET_CODE (op))
4029 /* Modern CPUs have same latency for HImode and SImode multiply,
4030 but 386 and 486 do HImode multiply faster. */
4031 return ix86_tune > PROCESSOR_I486;
4043 /* Nearly general operand, but accept any const_double, since we wish
4044 to be able to drop them into memory rather than have them get pulled
4048 cmp_fp_expander_operand (op, mode)
4050 enum machine_mode mode;
4052 if (mode != VOIDmode && mode != GET_MODE (op))
4054 if (GET_CODE (op) == CONST_DOUBLE)
4056 return general_operand (op, mode);
4059 /* Match an SI or HImode register for a zero_extract. */
4062 ext_register_operand (op, mode)
4064 enum machine_mode mode ATTRIBUTE_UNUSED;
4067 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4068 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4071 if (!register_operand (op, VOIDmode))
4074 /* Be careful to accept only registers having upper parts. */
4075 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4076 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4079 /* Return 1 if this is a valid binary floating-point operation.
4080 OP is the expression matched, and MODE is its mode. */
4083 binary_fp_operator (op, mode)
4085 enum machine_mode mode;
4087 if (mode != VOIDmode && mode != GET_MODE (op))
4090 switch (GET_CODE (op))
4096 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4104 mult_operator (op, mode)
4106 enum machine_mode mode ATTRIBUTE_UNUSED;
4108 return GET_CODE (op) == MULT;
4112 div_operator (op, mode)
4114 enum machine_mode mode ATTRIBUTE_UNUSED;
4116 return GET_CODE (op) == DIV;
4120 arith_or_logical_operator (op, mode)
4122 enum machine_mode mode;
4124 return ((mode == VOIDmode || GET_MODE (op) == mode)
4125 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4126 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4129 /* Returns 1 if OP is memory operand with a displacement. */
4132 memory_displacement_operand (op, mode)
4134 enum machine_mode mode;
4136 struct ix86_address parts;
4138 if (! memory_operand (op, mode))
4141 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4144 return parts.disp != NULL_RTX;
4147 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4148 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4150 ??? It seems likely that this will only work because cmpsi is an
4151 expander, and no actual insns use this. */
4154 cmpsi_operand (op, mode)
4156 enum machine_mode mode;
4158 if (nonimmediate_operand (op, mode))
4161 if (GET_CODE (op) == AND
4162 && GET_MODE (op) == SImode
4163 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4164 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4165 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4166 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4167 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4168 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4174 /* Returns 1 if OP is memory operand that can not be represented by the
4178 long_memory_operand (op, mode)
4180 enum machine_mode mode;
4182 if (! memory_operand (op, mode))
4185 return memory_address_length (op) != 0;
4188 /* Return nonzero if the rtx is known aligned. */
4191 aligned_operand (op, mode)
4193 enum machine_mode mode;
4195 struct ix86_address parts;
4197 if (!general_operand (op, mode))
4200 /* Registers and immediate operands are always "aligned". */
4201 if (GET_CODE (op) != MEM)
4204 /* Don't even try to do any aligned optimizations with volatiles. */
4205 if (MEM_VOLATILE_P (op))
4210 /* Pushes and pops are only valid on the stack pointer. */
4211 if (GET_CODE (op) == PRE_DEC
4212 || GET_CODE (op) == POST_INC)
4215 /* Decode the address. */
4216 if (! ix86_decompose_address (op, &parts))
4219 if (parts.base && GET_CODE (parts.base) == SUBREG)
4220 parts.base = SUBREG_REG (parts.base);
4221 if (parts.index && GET_CODE (parts.index) == SUBREG)
4222 parts.index = SUBREG_REG (parts.index);
4224 /* Look for some component that isn't known to be aligned. */
4228 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4233 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4238 if (GET_CODE (parts.disp) != CONST_INT
4239 || (INTVAL (parts.disp) & 3) != 0)
4243 /* Didn't find one -- this must be an aligned address. */
4247 /* Initialize the table of extra 80387 mathematical constants. */
4250 init_ext_80387_constants ()
4252 static const char * cst[5] =
4254 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4255 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4256 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4257 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4258 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4262 for (i = 0; i < 5; i++)
4264 real_from_string (&ext_80387_constants_table[i], cst[i]);
4265 /* Ensure each constant is rounded to XFmode precision. */
4266 real_convert (&ext_80387_constants_table[i], XFmode,
4267 &ext_80387_constants_table[i]);
4270 ext_80387_constants_init = 1;
4273 /* Return true if the constant is something that can be loaded with
4274 a special instruction. */
4277 standard_80387_constant_p (x)
4280 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4283 if (x == CONST0_RTX (GET_MODE (x)))
4285 if (x == CONST1_RTX (GET_MODE (x)))
4288 /* For XFmode constants, try to find a special 80387 instruction on
4289 those CPUs that benefit from them. */
4290 if (GET_MODE (x) == XFmode
4291 && x86_ext_80387_constants & TUNEMASK)
4296 if (! ext_80387_constants_init)
4297 init_ext_80387_constants ();
4299 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4300 for (i = 0; i < 5; i++)
4301 if (real_identical (&r, &ext_80387_constants_table[i]))
4308 /* Return the opcode of the special instruction to be used to load
4312 standard_80387_constant_opcode (x)
4315 switch (standard_80387_constant_p (x))
4335 /* Return the CONST_DOUBLE representing the 80387 constant that is
4336 loaded by the specified special instruction. The argument IDX
4337 matches the return value from standard_80387_constant_p. */
4340 standard_80387_constant_rtx (idx)
4345 if (! ext_80387_constants_init)
4346 init_ext_80387_constants ();
4362 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], XFmode);
4365 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4368 standard_sse_constant_p (x)
4371 if (x == const0_rtx)
4373 return (x == CONST0_RTX (GET_MODE (x)));
4376 /* Returns 1 if OP contains a symbol reference */
4379 symbolic_reference_mentioned_p (op)
4382 register const char *fmt;
4385 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4388 fmt = GET_RTX_FORMAT (GET_CODE (op));
4389 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4395 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4396 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4400 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4407 /* Return 1 if it is appropriate to emit `ret' instructions in the
4408 body of a function. Do this only if the epilogue is simple, needing a
4409 couple of insns. Prior to reloading, we can't tell how many registers
4410 must be saved, so return 0 then. Return 0 if there is no frame
4411 marker to de-allocate.
4413 If NON_SAVING_SETJMP is defined and true, then it is not possible
4414 for the epilogue to be simple, so return 0. This is a special case
4415 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4416 until final, but jump_optimize may need to know sooner if a
4420 ix86_can_use_return_insn_p ()
4422 struct ix86_frame frame;
4424 #ifdef NON_SAVING_SETJMP
4425 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4429 if (! reload_completed || frame_pointer_needed)
4432 /* Don't allow more than 32 pop, since that's all we can do
4433 with one instruction. */
4434 if (current_function_pops_args
4435 && current_function_args_size >= 32768)
4438 ix86_compute_frame_layout (&frame);
4439 return frame.to_allocate == 0 && frame.nregs == 0;
4442 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4444 x86_64_sign_extended_value (value)
4447 switch (GET_CODE (value))
4449 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4450 to be at least 32 and this all acceptable constants are
4451 represented as CONST_INT. */
4453 if (HOST_BITS_PER_WIDE_INT == 32)
4457 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4458 return trunc_int_for_mode (val, SImode) == val;
4462 /* For certain code models, the symbolic references are known to fit.
4463 in CM_SMALL_PIC model we know it fits if it is local to the shared
4464 library. Don't count TLS SYMBOL_REFs here, since they should fit
4465 only if inside of UNSPEC handled below. */
4467 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4469 /* For certain code models, the code is near as well. */
4471 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4472 || ix86_cmodel == CM_KERNEL);
4474 /* We also may accept the offsetted memory references in certain special
4477 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4478 switch (XINT (XEXP (value, 0), 1))
4480 case UNSPEC_GOTPCREL:
4482 case UNSPEC_GOTNTPOFF:
4488 if (GET_CODE (XEXP (value, 0)) == PLUS)
4490 rtx op1 = XEXP (XEXP (value, 0), 0);
4491 rtx op2 = XEXP (XEXP (value, 0), 1);
4492 HOST_WIDE_INT offset;
4494 if (ix86_cmodel == CM_LARGE)
4496 if (GET_CODE (op2) != CONST_INT)
4498 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4499 switch (GET_CODE (op1))
4502 /* For CM_SMALL assume that latest object is 16MB before
4503 end of 31bits boundary. We may also accept pretty
4504 large negative constants knowing that all objects are
4505 in the positive half of address space. */
4506 if (ix86_cmodel == CM_SMALL
4507 && offset < 16*1024*1024
4508 && trunc_int_for_mode (offset, SImode) == offset)
4510 /* For CM_KERNEL we know that all object resist in the
4511 negative half of 32bits address space. We may not
4512 accept negative offsets, since they may be just off
4513 and we may accept pretty large positive ones. */
4514 if (ix86_cmodel == CM_KERNEL
4516 && trunc_int_for_mode (offset, SImode) == offset)
4520 /* These conditions are similar to SYMBOL_REF ones, just the
4521 constraints for code models differ. */
4522 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4523 && offset < 16*1024*1024
4524 && trunc_int_for_mode (offset, SImode) == offset)
4526 if (ix86_cmodel == CM_KERNEL
4528 && trunc_int_for_mode (offset, SImode) == offset)
4532 switch (XINT (op1, 1))
4537 && trunc_int_for_mode (offset, SImode) == offset)
4551 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4553 x86_64_zero_extended_value (value)
4556 switch (GET_CODE (value))
4559 if (HOST_BITS_PER_WIDE_INT == 32)
4560 return (GET_MODE (value) == VOIDmode
4561 && !CONST_DOUBLE_HIGH (value));
4565 if (HOST_BITS_PER_WIDE_INT == 32)
4566 return INTVAL (value) >= 0;
4568 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4571 /* For certain code models, the symbolic references are known to fit. */
4573 return ix86_cmodel == CM_SMALL;
4575 /* For certain code models, the code is near as well. */
4577 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4579 /* We also may accept the offsetted memory references in certain special
4582 if (GET_CODE (XEXP (value, 0)) == PLUS)
4584 rtx op1 = XEXP (XEXP (value, 0), 0);
4585 rtx op2 = XEXP (XEXP (value, 0), 1);
4587 if (ix86_cmodel == CM_LARGE)
4589 switch (GET_CODE (op1))
4593 /* For small code model we may accept pretty large positive
4594 offsets, since one bit is available for free. Negative
4595 offsets are limited by the size of NULL pointer area
4596 specified by the ABI. */
4597 if (ix86_cmodel == CM_SMALL
4598 && GET_CODE (op2) == CONST_INT
4599 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4600 && (trunc_int_for_mode (INTVAL (op2), SImode)
4603 /* ??? For the kernel, we may accept adjustment of
4604 -0x10000000, since we know that it will just convert
4605 negative address space to positive, but perhaps this
4606 is not worthwhile. */
4609 /* These conditions are similar to SYMBOL_REF ones, just the
4610 constraints for code models differ. */
4611 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4612 && GET_CODE (op2) == CONST_INT
4613 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4614 && (trunc_int_for_mode (INTVAL (op2), SImode)
4628 /* Value should be nonzero if functions must have frame pointers.
4629 Zero means the frame pointer need not be set up (and parms may
4630 be accessed via the stack pointer) in functions that seem suitable. */
4633 ix86_frame_pointer_required ()
4635 /* If we accessed previous frames, then the generated code expects
4636 to be able to access the saved ebp value in our frame. */
4637 if (cfun->machine->accesses_prev_frame)
4640 /* Several x86 os'es need a frame pointer for other reasons,
4641 usually pertaining to setjmp. */
4642 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4645 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4646 the frame pointer by default. Turn it back on now if we've not
4647 got a leaf function. */
4648 if (TARGET_OMIT_LEAF_FRAME_POINTER
4649 && (!current_function_is_leaf))
4652 if (current_function_profile)
4658 /* Record that the current function accesses previous call frames. */
4661 ix86_setup_frame_addresses ()
4663 cfun->machine->accesses_prev_frame = 1;
4666 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4667 # define USE_HIDDEN_LINKONCE 1
4669 # define USE_HIDDEN_LINKONCE 0
4672 static int pic_labels_used;
4674 /* Fills in the label name that should be used for a pc thunk for
4675 the given register. */
4678 get_pc_thunk_name (name, regno)
4682 if (USE_HIDDEN_LINKONCE)
4683 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4685 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4689 /* This function generates code for -fpic that loads %ebx with
4690 the return address of the caller and then returns. */
4693 ix86_asm_file_end (file)
4699 for (regno = 0; regno < 8; ++regno)
4703 if (! ((pic_labels_used >> regno) & 1))
4706 get_pc_thunk_name (name, regno);
4708 if (USE_HIDDEN_LINKONCE)
4712 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4714 TREE_PUBLIC (decl) = 1;
4715 TREE_STATIC (decl) = 1;
4716 DECL_ONE_ONLY (decl) = 1;
4718 (*targetm.asm_out.unique_section) (decl, 0);
4719 named_section (decl, NULL, 0);
4721 (*targetm.asm_out.globalize_label) (file, name);
4722 fputs ("\t.hidden\t", file);
4723 assemble_name (file, name);
4725 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4730 ASM_OUTPUT_LABEL (file, name);
4733 xops[0] = gen_rtx_REG (SImode, regno);
4734 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4735 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4736 output_asm_insn ("ret", xops);
4740 /* Emit code for the SET_GOT patterns. */
4743 output_set_got (dest)
4749 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4751 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4753 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4756 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4758 output_asm_insn ("call\t%a2", xops);
4761 /* Output the "canonical" label name ("Lxx$pb") here too. This
4762 is what will be referred to by the Mach-O PIC subsystem. */
4763 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4765 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4766 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4769 output_asm_insn ("pop{l}\t%0", xops);
4774 get_pc_thunk_name (name, REGNO (dest));
4775 pic_labels_used |= 1 << REGNO (dest);
4777 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4778 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4779 output_asm_insn ("call\t%X2", xops);
4782 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4783 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4784 else if (!TARGET_MACHO)
4785 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4790 /* Generate an "push" pattern for input ARG. */
4796 return gen_rtx_SET (VOIDmode,
4798 gen_rtx_PRE_DEC (Pmode,
4799 stack_pointer_rtx)),
4803 /* Return >= 0 if there is an unused call-clobbered register available
4804 for the entire function. */
4807 ix86_select_alt_pic_regnum ()
4809 if (current_function_is_leaf && !current_function_profile)
4812 for (i = 2; i >= 0; --i)
4813 if (!regs_ever_live[i])
4817 return INVALID_REGNUM;
4820 /* Return 1 if we need to save REGNO. */
4822 ix86_save_reg (regno, maybe_eh_return)
4824 int maybe_eh_return;
4826 if (pic_offset_table_rtx
4827 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4828 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4829 || current_function_profile
4830 || current_function_calls_eh_return
4831 || current_function_uses_const_pool))
4833 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4838 if (current_function_calls_eh_return && maybe_eh_return)
4843 unsigned test = EH_RETURN_DATA_REGNO (i);
4844 if (test == INVALID_REGNUM)
4851 return (regs_ever_live[regno]
4852 && !call_used_regs[regno]
4853 && !fixed_regs[regno]
4854 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4857 /* Return number of registers to be saved on the stack. */
4865 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4866 if (ix86_save_reg (regno, true))
4871 /* Return the offset between two registers, one to be eliminated, and the other
4872 its replacement, at the start of a routine. */
4875 ix86_initial_elimination_offset (from, to)
4879 struct ix86_frame frame;
4880 ix86_compute_frame_layout (&frame);
4882 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4883 return frame.hard_frame_pointer_offset;
4884 else if (from == FRAME_POINTER_REGNUM
4885 && to == HARD_FRAME_POINTER_REGNUM)
4886 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4889 if (to != STACK_POINTER_REGNUM)
4891 else if (from == ARG_POINTER_REGNUM)
4892 return frame.stack_pointer_offset;
4893 else if (from != FRAME_POINTER_REGNUM)
4896 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4900 /* Fill structure ix86_frame about frame of currently computed function. */
4903 ix86_compute_frame_layout (frame)
4904 struct ix86_frame *frame;
4906 HOST_WIDE_INT total_size;
4907 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4909 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4910 HOST_WIDE_INT size = get_frame_size ();
4912 frame->nregs = ix86_nsaved_regs ();
4915 /* Skip return address and saved base pointer. */
4916 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4918 frame->hard_frame_pointer_offset = offset;
4920 /* Do some sanity checking of stack_alignment_needed and
4921 preferred_alignment, since i386 port is the only using those features
4922 that may break easily. */
4924 if (size && !stack_alignment_needed)
4926 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4928 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4930 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4933 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4934 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4936 /* Register save area */
4937 offset += frame->nregs * UNITS_PER_WORD;
4940 if (ix86_save_varrargs_registers)
4942 offset += X86_64_VARARGS_SIZE;
4943 frame->va_arg_size = X86_64_VARARGS_SIZE;
4946 frame->va_arg_size = 0;
4948 /* Align start of frame for local function. */
4949 frame->padding1 = ((offset + stack_alignment_needed - 1)
4950 & -stack_alignment_needed) - offset;
4952 offset += frame->padding1;
4954 /* Frame pointer points here. */
4955 frame->frame_pointer_offset = offset;
4959 /* Add outgoing arguments area. Can be skipped if we eliminated
4960 all the function calls as dead code. */
4961 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4963 offset += current_function_outgoing_args_size;
4964 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4967 frame->outgoing_arguments_size = 0;
4969 /* Align stack boundary. Only needed if we're calling another function
4971 if (!current_function_is_leaf || current_function_calls_alloca)
4972 frame->padding2 = ((offset + preferred_alignment - 1)
4973 & -preferred_alignment) - offset;
4975 frame->padding2 = 0;
4977 offset += frame->padding2;
4979 /* We've reached end of stack frame. */
4980 frame->stack_pointer_offset = offset;
4982 /* Size prologue needs to allocate. */
4983 frame->to_allocate =
4984 (size + frame->padding1 + frame->padding2
4985 + frame->outgoing_arguments_size + frame->va_arg_size);
4987 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4988 && current_function_is_leaf)
4990 frame->red_zone_size = frame->to_allocate;
4991 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4992 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4995 frame->red_zone_size = 0;
4996 frame->to_allocate -= frame->red_zone_size;
4997 frame->stack_pointer_offset -= frame->red_zone_size;
4999 fprintf (stderr, "nregs: %i\n", frame->nregs);
5000 fprintf (stderr, "size: %i\n", size);
5001 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5002 fprintf (stderr, "padding1: %i\n", frame->padding1);
5003 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5004 fprintf (stderr, "padding2: %i\n", frame->padding2);
5005 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5006 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5007 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5008 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5009 frame->hard_frame_pointer_offset);
5010 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5014 /* Emit code to save registers in the prologue. */
5017 ix86_emit_save_regs ()
5022 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5023 if (ix86_save_reg (regno, true))
5025 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5026 RTX_FRAME_RELATED_P (insn) = 1;
5030 /* Emit code to save registers using MOV insns. First register
5031 is restored from POINTER + OFFSET. */
5033 ix86_emit_save_regs_using_mov (pointer, offset)
5035 HOST_WIDE_INT offset;
5040 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5041 if (ix86_save_reg (regno, true))
5043 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5045 gen_rtx_REG (Pmode, regno));
5046 RTX_FRAME_RELATED_P (insn) = 1;
5047 offset += UNITS_PER_WORD;
5051 /* Expand the prologue into a bunch of separate insns. */
5054 ix86_expand_prologue ()
5058 struct ix86_frame frame;
5060 HOST_WIDE_INT allocate;
5062 ix86_compute_frame_layout (&frame);
5065 int count = frame.nregs;
5067 /* The fast prologue uses move instead of push to save registers. This
5068 is significantly longer, but also executes faster as modern hardware
5069 can execute the moves in parallel, but can't do that for push/pop.
5071 Be careful about choosing what prologue to emit: When function takes
5072 many instructions to execute we may use slow version as well as in
5073 case function is known to be outside hot spot (this is known with
5074 feedback only). Weight the size of function by number of registers
5075 to save as it is cheap to use one or two push instructions but very
5076 slow to use many of them. */
5078 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5079 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5080 || (flag_branch_probabilities
5081 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5082 use_fast_prologue_epilogue = 0;
5084 use_fast_prologue_epilogue = !expensive_function_p (count);
5085 if (TARGET_PROLOGUE_USING_MOVE)
5086 use_mov = use_fast_prologue_epilogue;
5089 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5090 slower on all targets. Also sdb doesn't like it. */
5092 if (frame_pointer_needed)
5094 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5095 RTX_FRAME_RELATED_P (insn) = 1;
5097 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5098 RTX_FRAME_RELATED_P (insn) = 1;
5101 allocate = frame.to_allocate;
5102 /* In case we are dealing only with single register and empty frame,
5103 push is equivalent of the mov+add sequence. */
5104 if (allocate == 0 && frame.nregs <= 1)
5108 ix86_emit_save_regs ();
5110 allocate += frame.nregs * UNITS_PER_WORD;
5114 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5116 insn = emit_insn (gen_pro_epilogue_adjust_stack
5117 (stack_pointer_rtx, stack_pointer_rtx,
5118 GEN_INT (-allocate)));
5119 RTX_FRAME_RELATED_P (insn) = 1;
5123 /* ??? Is this only valid for Win32? */
5130 arg0 = gen_rtx_REG (SImode, 0);
5131 emit_move_insn (arg0, GEN_INT (allocate));
5133 sym = gen_rtx_MEM (FUNCTION_MODE,
5134 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5135 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5137 CALL_INSN_FUNCTION_USAGE (insn)
5138 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5139 CALL_INSN_FUNCTION_USAGE (insn));
5141 /* Don't allow scheduling pass to move insns across __alloca
5143 emit_insn (gen_blockage (const0_rtx));
5147 if (!frame_pointer_needed || !frame.to_allocate)
5148 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5150 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5151 -frame.nregs * UNITS_PER_WORD);
5154 #ifdef SUBTARGET_PROLOGUE
5158 pic_reg_used = false;
5159 if (pic_offset_table_rtx
5160 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5161 || current_function_profile))
5163 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5165 if (alt_pic_reg_used != INVALID_REGNUM)
5166 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5168 pic_reg_used = true;
5173 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5175 /* Even with accurate pre-reload life analysis, we can wind up
5176 deleting all references to the pic register after reload.
5177 Consider if cross-jumping unifies two sides of a branch
5178 controlled by a comparison vs the only read from a global.
5179 In which case, allow the set_got to be deleted, though we're
5180 too late to do anything about the ebx save in the prologue. */
5181 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5184 /* Prevent function calls from be scheduled before the call to mcount.
5185 In the pic_reg_used case, make sure that the got load isn't deleted. */
5186 if (current_function_profile)
5187 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5190 /* Emit code to restore saved registers using MOV insns. First register
5191 is restored from POINTER + OFFSET. */
5193 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5196 int maybe_eh_return;
5200 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5201 if (ix86_save_reg (regno, maybe_eh_return))
5203 emit_move_insn (gen_rtx_REG (Pmode, regno),
5204 adjust_address (gen_rtx_MEM (Pmode, pointer),
5206 offset += UNITS_PER_WORD;
5210 /* Restore function stack, frame, and registers. */
5213 ix86_expand_epilogue (style)
5217 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5218 struct ix86_frame frame;
5219 HOST_WIDE_INT offset;
5221 ix86_compute_frame_layout (&frame);
5223 /* Calculate start of saved registers relative to ebp. Special care
5224 must be taken for the normal return case of a function using
5225 eh_return: the eax and edx registers are marked as saved, but not
5226 restored along this path. */
5227 offset = frame.nregs;
5228 if (current_function_calls_eh_return && style != 2)
5230 offset *= -UNITS_PER_WORD;
5232 /* If we're only restoring one register and sp is not valid then
5233 using a move instruction to restore the register since it's
5234 less work than reloading sp and popping the register.
5236 The default code result in stack adjustment using add/lea instruction,
5237 while this code results in LEAVE instruction (or discrete equivalent),
5238 so it is profitable in some other cases as well. Especially when there
5239 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5240 and there is exactly one register to pop. This heuristic may need some
5241 tuning in future. */
5242 if ((!sp_valid && frame.nregs <= 1)
5243 || (TARGET_EPILOGUE_USING_MOVE
5244 && use_fast_prologue_epilogue
5245 && (frame.nregs > 1 || frame.to_allocate))
5246 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5247 || (frame_pointer_needed && TARGET_USE_LEAVE
5248 && use_fast_prologue_epilogue && frame.nregs == 1)
5249 || current_function_calls_eh_return)
5251 /* Restore registers. We can use ebp or esp to address the memory
5252 locations. If both are available, default to ebp, since offsets
5253 are known to be small. Only exception is esp pointing directly to the
5254 end of block of saved registers, where we may simplify addressing
5257 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5258 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5259 frame.to_allocate, style == 2);
5261 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5262 offset, style == 2);
5264 /* eh_return epilogues need %ecx added to the stack pointer. */
5267 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5269 if (frame_pointer_needed)
5271 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5272 tmp = plus_constant (tmp, UNITS_PER_WORD);
5273 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5275 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5276 emit_move_insn (hard_frame_pointer_rtx, tmp);
5278 emit_insn (gen_pro_epilogue_adjust_stack
5279 (stack_pointer_rtx, sa, const0_rtx));
5283 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5284 tmp = plus_constant (tmp, (frame.to_allocate
5285 + frame.nregs * UNITS_PER_WORD));
5286 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5289 else if (!frame_pointer_needed)
5290 emit_insn (gen_pro_epilogue_adjust_stack
5291 (stack_pointer_rtx, stack_pointer_rtx,
5292 GEN_INT (frame.to_allocate
5293 + frame.nregs * UNITS_PER_WORD)));
5294 /* If not an i386, mov & pop is faster than "leave". */
5295 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5296 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5299 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5300 hard_frame_pointer_rtx,
5303 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5305 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5310 /* First step is to deallocate the stack frame so that we can
5311 pop the registers. */
5314 if (!frame_pointer_needed)
5316 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5317 hard_frame_pointer_rtx,
5320 else if (frame.to_allocate)
5321 emit_insn (gen_pro_epilogue_adjust_stack
5322 (stack_pointer_rtx, stack_pointer_rtx,
5323 GEN_INT (frame.to_allocate)));
5325 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5326 if (ix86_save_reg (regno, false))
5329 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5331 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5333 if (frame_pointer_needed)
5335 /* Leave results in shorter dependency chains on CPUs that are
5336 able to grok it fast. */
5337 if (TARGET_USE_LEAVE)
5338 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5339 else if (TARGET_64BIT)
5340 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5342 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5346 /* Sibcall epilogues don't want a return instruction. */
5350 if (current_function_pops_args && current_function_args_size)
5352 rtx popc = GEN_INT (current_function_pops_args);
5354 /* i386 can only pop 64K bytes. If asked to pop more, pop
5355 return address, do explicit add, and jump indirectly to the
5358 if (current_function_pops_args >= 65536)
5360 rtx ecx = gen_rtx_REG (SImode, 2);
5362 /* There are is no "pascal" calling convention in 64bit ABI. */
5366 emit_insn (gen_popsi1 (ecx));
5367 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5368 emit_jump_insn (gen_return_indirect_internal (ecx));
5371 emit_jump_insn (gen_return_pop_internal (popc));
5374 emit_jump_insn (gen_return_internal ());
5377 /* Reset from the function's potential modifications. */
5380 ix86_output_function_epilogue (file, size)
5381 FILE *file ATTRIBUTE_UNUSED;
5382 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5384 if (pic_offset_table_rtx)
5385 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5388 /* Extract the parts of an RTL expression that is a valid memory address
5389 for an instruction. Return 0 if the structure of the address is
5390 grossly off. Return -1 if the address contains ASHIFT, so it is not
5391 strictly valid, but still used for computing length of lea instruction.
5395 ix86_decompose_address (addr, out)
5397 struct ix86_address *out;
5399 rtx base = NULL_RTX;
5400 rtx index = NULL_RTX;
5401 rtx disp = NULL_RTX;
5402 HOST_WIDE_INT scale = 1;
5403 rtx scale_rtx = NULL_RTX;
5406 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5408 else if (GET_CODE (addr) == PLUS)
5410 rtx op0 = XEXP (addr, 0);
5411 rtx op1 = XEXP (addr, 1);
5412 enum rtx_code code0 = GET_CODE (op0);
5413 enum rtx_code code1 = GET_CODE (op1);
5415 if (code0 == REG || code0 == SUBREG)
5417 if (code1 == REG || code1 == SUBREG)
5418 index = op0, base = op1; /* index + base */
5420 base = op0, disp = op1; /* base + displacement */
5422 else if (code0 == MULT)
5424 index = XEXP (op0, 0);
5425 scale_rtx = XEXP (op0, 1);
5426 if (code1 == REG || code1 == SUBREG)
5427 base = op1; /* index*scale + base */
5429 disp = op1; /* index*scale + disp */
5431 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5433 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5434 scale_rtx = XEXP (XEXP (op0, 0), 1);
5435 base = XEXP (op0, 1);
5438 else if (code0 == PLUS)
5440 index = XEXP (op0, 0); /* index + base + disp */
5441 base = XEXP (op0, 1);
5447 else if (GET_CODE (addr) == MULT)
5449 index = XEXP (addr, 0); /* index*scale */
5450 scale_rtx = XEXP (addr, 1);
5452 else if (GET_CODE (addr) == ASHIFT)
5456 /* We're called for lea too, which implements ashift on occasion. */
5457 index = XEXP (addr, 0);
5458 tmp = XEXP (addr, 1);
5459 if (GET_CODE (tmp) != CONST_INT)
5461 scale = INTVAL (tmp);
5462 if ((unsigned HOST_WIDE_INT) scale > 3)
5468 disp = addr; /* displacement */
5470 /* Extract the integral value of scale. */
5473 if (GET_CODE (scale_rtx) != CONST_INT)
5475 scale = INTVAL (scale_rtx);
5478 /* Allow arg pointer and stack pointer as index if there is not scaling */
5479 if (base && index && scale == 1
5480 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5481 || index == stack_pointer_rtx))
5488 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5489 if ((base == hard_frame_pointer_rtx
5490 || base == frame_pointer_rtx
5491 || base == arg_pointer_rtx) && !disp)
5494 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5495 Avoid this by transforming to [%esi+0]. */
5496 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5497 && base && !index && !disp
5499 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5502 /* Special case: encode reg+reg instead of reg*2. */
5503 if (!base && index && scale && scale == 2)
5504 base = index, scale = 1;
5506 /* Special case: scaling cannot be encoded without base or displacement. */
5507 if (!base && !disp && index && scale != 1)
5518 /* Return cost of the memory address x.
5519 For i386, it is better to use a complex address than let gcc copy
5520 the address into a reg and make a new pseudo. But not if the address
5521 requires to two regs - that would mean more pseudos with longer
5524 ix86_address_cost (x)
5527 struct ix86_address parts;
5530 if (!ix86_decompose_address (x, &parts))
5533 if (parts.base && GET_CODE (parts.base) == SUBREG)
5534 parts.base = SUBREG_REG (parts.base);
5535 if (parts.index && GET_CODE (parts.index) == SUBREG)
5536 parts.index = SUBREG_REG (parts.index);
5538 /* More complex memory references are better. */
5539 if (parts.disp && parts.disp != const0_rtx)
5542 /* Attempt to minimize number of registers in the address. */
5544 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5546 && (!REG_P (parts.index)
5547 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5551 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5553 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5554 && parts.base != parts.index)
5557 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5558 since it's predecode logic can't detect the length of instructions
5559 and it degenerates to vector decoded. Increase cost of such
5560 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5561 to split such addresses or even refuse such addresses at all.
5563 Following addressing modes are affected:
5568 The first and last case may be avoidable by explicitly coding the zero in
5569 memory address, but I don't have AMD-K6 machine handy to check this
5573 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5574 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5575 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5581 /* If X is a machine specific address (i.e. a symbol or label being
5582 referenced as a displacement from the GOT implemented using an
5583 UNSPEC), then return the base term. Otherwise return X. */
5586 ix86_find_base_term (x)
5593 if (GET_CODE (x) != CONST)
5596 if (GET_CODE (term) == PLUS
5597 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5598 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5599 term = XEXP (term, 0);
5600 if (GET_CODE (term) != UNSPEC
5601 || XINT (term, 1) != UNSPEC_GOTPCREL)
5604 term = XVECEXP (term, 0, 0);
5606 if (GET_CODE (term) != SYMBOL_REF
5607 && GET_CODE (term) != LABEL_REF)
5613 term = ix86_delegitimize_address (x);
5615 if (GET_CODE (term) != SYMBOL_REF
5616 && GET_CODE (term) != LABEL_REF)
5622 /* Determine if a given RTX is a valid constant. We already know this
5623 satisfies CONSTANT_P. */
5626 legitimate_constant_p (x)
5631 switch (GET_CODE (x))
5634 /* TLS symbols are not constant. */
5635 if (tls_symbolic_operand (x, Pmode))
5640 inner = XEXP (x, 0);
5642 /* Offsets of TLS symbols are never valid.
5643 Discourage CSE from creating them. */
5644 if (GET_CODE (inner) == PLUS
5645 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5648 /* Only some unspecs are valid as "constants". */
5649 if (GET_CODE (inner) == UNSPEC)
5650 switch (XINT (inner, 1))
5653 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5663 /* Otherwise we handle everything else in the move patterns. */
5667 /* Determine if it's legal to put X into the constant pool. This
5668 is not possible for the address of thread-local symbols, which
5669 is checked above. */
5672 ix86_cannot_force_const_mem (x)
5675 return !legitimate_constant_p (x);
5678 /* Determine if a given RTX is a valid constant address. */
5681 constant_address_p (x)
5684 switch (GET_CODE (x))
5691 return TARGET_64BIT;
5694 /* For Mach-O, really believe the CONST. */
5697 /* Otherwise fall through. */
5699 return !flag_pic && legitimate_constant_p (x);
5706 /* Nonzero if the constant value X is a legitimate general operand
5707 when generating PIC code. It is given that flag_pic is on and
5708 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5711 legitimate_pic_operand_p (x)
5716 switch (GET_CODE (x))
5719 inner = XEXP (x, 0);
5721 /* Only some unspecs are valid as "constants". */
5722 if (GET_CODE (inner) == UNSPEC)
5723 switch (XINT (inner, 1))
5726 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5734 return legitimate_pic_address_disp_p (x);
5741 /* Determine if a given CONST RTX is a valid memory displacement
5745 legitimate_pic_address_disp_p (disp)
5750 /* In 64bit mode we can allow direct addresses of symbols and labels
5751 when they are not dynamic symbols. */
5754 /* TLS references should always be enclosed in UNSPEC. */
5755 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5757 if (GET_CODE (disp) == SYMBOL_REF
5758 && ix86_cmodel == CM_SMALL_PIC
5759 && (CONSTANT_POOL_ADDRESS_P (disp)
5760 || SYMBOL_REF_FLAG (disp)))
5762 if (GET_CODE (disp) == LABEL_REF)
5764 if (GET_CODE (disp) == CONST
5765 && GET_CODE (XEXP (disp, 0)) == PLUS
5766 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5767 && ix86_cmodel == CM_SMALL_PIC
5768 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5769 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5770 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5771 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5772 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5773 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5776 if (GET_CODE (disp) != CONST)
5778 disp = XEXP (disp, 0);
5782 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5783 of GOT tables. We should not need these anyway. */
5784 if (GET_CODE (disp) != UNSPEC
5785 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5788 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5789 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5795 if (GET_CODE (disp) == PLUS)
5797 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5799 disp = XEXP (disp, 0);
5803 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5804 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5806 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5807 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5808 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5810 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5811 if (strstr (sym_name, "$pb") != 0)
5816 if (GET_CODE (disp) != UNSPEC)
5819 switch (XINT (disp, 1))
5824 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5826 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5827 case UNSPEC_GOTTPOFF:
5828 case UNSPEC_GOTNTPOFF:
5829 case UNSPEC_INDNTPOFF:
5832 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5834 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5836 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5842 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5843 memory address for an instruction. The MODE argument is the machine mode
5844 for the MEM expression that wants to use this address.
5846 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5847 convert common non-canonical forms to canonical form so that they will
5851 legitimate_address_p (mode, addr, strict)
5852 enum machine_mode mode;
5856 struct ix86_address parts;
5857 rtx base, index, disp;
5858 HOST_WIDE_INT scale;
5859 const char *reason = NULL;
5860 rtx reason_rtx = NULL_RTX;
5862 if (TARGET_DEBUG_ADDR)
5865 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5866 GET_MODE_NAME (mode), strict);
5870 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5872 if (TARGET_DEBUG_ADDR)
5873 fprintf (stderr, "Success.\n");
5877 if (ix86_decompose_address (addr, &parts) <= 0)
5879 reason = "decomposition failed";
5884 index = parts.index;
5886 scale = parts.scale;
5888 /* Validate base register.
5890 Don't allow SUBREG's here, it can lead to spill failures when the base
5891 is one word out of a two word structure, which is represented internally
5899 if (GET_CODE (base) == SUBREG)
5900 reg = SUBREG_REG (base);
5904 if (GET_CODE (reg) != REG)
5906 reason = "base is not a register";
5910 if (GET_MODE (base) != Pmode)
5912 reason = "base is not in Pmode";
5916 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5917 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5919 reason = "base is not valid";
5924 /* Validate index register.
5926 Don't allow SUBREG's here, it can lead to spill failures when the index
5927 is one word out of a two word structure, which is represented internally
5935 if (GET_CODE (index) == SUBREG)
5936 reg = SUBREG_REG (index);
5940 if (GET_CODE (reg) != REG)
5942 reason = "index is not a register";
5946 if (GET_MODE (index) != Pmode)
5948 reason = "index is not in Pmode";
5952 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5953 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5955 reason = "index is not valid";
5960 /* Validate scale factor. */
5963 reason_rtx = GEN_INT (scale);
5966 reason = "scale without index";
5970 if (scale != 2 && scale != 4 && scale != 8)
5972 reason = "scale is not a valid multiplier";
5977 /* Validate displacement. */
5982 if (GET_CODE (disp) == CONST
5983 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5984 switch (XINT (XEXP (disp, 0), 1))
5988 case UNSPEC_GOTPCREL:
5991 goto is_legitimate_pic;
5993 case UNSPEC_GOTTPOFF:
5994 case UNSPEC_GOTNTPOFF:
5995 case UNSPEC_INDNTPOFF:
6001 reason = "invalid address unspec";
6005 else if (flag_pic && (SYMBOLIC_CONST (disp)
6007 && !machopic_operand_p (disp)
6012 if (TARGET_64BIT && (index || base))
6014 /* foo@dtpoff(%rX) is ok. */
6015 if (GET_CODE (disp) != CONST
6016 || GET_CODE (XEXP (disp, 0)) != PLUS
6017 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6018 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6019 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6020 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6022 reason = "non-constant pic memory reference";
6026 else if (! legitimate_pic_address_disp_p (disp))
6028 reason = "displacement is an invalid pic construct";
6032 /* This code used to verify that a symbolic pic displacement
6033 includes the pic_offset_table_rtx register.
6035 While this is good idea, unfortunately these constructs may
6036 be created by "adds using lea" optimization for incorrect
6045 This code is nonsensical, but results in addressing
6046 GOT table with pic_offset_table_rtx base. We can't
6047 just refuse it easily, since it gets matched by
6048 "addsi3" pattern, that later gets split to lea in the
6049 case output register differs from input. While this
6050 can be handled by separate addsi pattern for this case
6051 that never results in lea, this seems to be easier and
6052 correct fix for crash to disable this test. */
6054 else if (!CONSTANT_ADDRESS_P (disp))
6056 reason = "displacement is not constant";
6059 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6061 reason = "displacement is out of range";
6064 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
6066 reason = "displacement is a const_double";
6071 /* Everything looks valid. */
6072 if (TARGET_DEBUG_ADDR)
6073 fprintf (stderr, "Success.\n");
6077 if (TARGET_DEBUG_ADDR)
6079 fprintf (stderr, "Error: %s\n", reason);
6080 debug_rtx (reason_rtx);
6085 /* Return an unique alias set for the GOT. */
6087 static HOST_WIDE_INT
6088 ix86_GOT_alias_set ()
6090 static HOST_WIDE_INT set = -1;
6092 set = new_alias_set ();
6096 /* Return a legitimate reference for ORIG (an address) using the
6097 register REG. If REG is 0, a new pseudo is generated.
6099 There are two types of references that must be handled:
6101 1. Global data references must load the address from the GOT, via
6102 the PIC reg. An insn is emitted to do this load, and the reg is
6105 2. Static data references, constant pool addresses, and code labels
6106 compute the address as an offset from the GOT, whose base is in
6107 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
6108 differentiate them from global data objects. The returned
6109 address is the PIC reg + an unspec constant.
6111 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6112 reg also appears in the address. */
6115 legitimize_pic_address (orig, reg)
6125 reg = gen_reg_rtx (Pmode);
6126 /* Use the generic Mach-O PIC machinery. */
6127 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6130 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6132 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6134 /* This symbol may be referenced via a displacement from the PIC
6135 base address (@GOTOFF). */
6137 if (reload_in_progress)
6138 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6139 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6140 new = gen_rtx_CONST (Pmode, new);
6141 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6145 emit_move_insn (reg, new);
6149 else if (GET_CODE (addr) == SYMBOL_REF)
6153 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6154 new = gen_rtx_CONST (Pmode, new);
6155 new = gen_rtx_MEM (Pmode, new);
6156 RTX_UNCHANGING_P (new) = 1;
6157 set_mem_alias_set (new, ix86_GOT_alias_set ());
6160 reg = gen_reg_rtx (Pmode);
6161 /* Use directly gen_movsi, otherwise the address is loaded
6162 into register for CSE. We don't want to CSE this addresses,
6163 instead we CSE addresses from the GOT table, so skip this. */
6164 emit_insn (gen_movsi (reg, new));
6169 /* This symbol must be referenced via a load from the
6170 Global Offset Table (@GOT). */
6172 if (reload_in_progress)
6173 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6174 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6175 new = gen_rtx_CONST (Pmode, new);
6176 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6177 new = gen_rtx_MEM (Pmode, new);
6178 RTX_UNCHANGING_P (new) = 1;
6179 set_mem_alias_set (new, ix86_GOT_alias_set ());
6182 reg = gen_reg_rtx (Pmode);
6183 emit_move_insn (reg, new);
6189 if (GET_CODE (addr) == CONST)
6191 addr = XEXP (addr, 0);
6193 /* We must match stuff we generate before. Assume the only
6194 unspecs that can get here are ours. Not that we could do
6195 anything with them anyway... */
6196 if (GET_CODE (addr) == UNSPEC
6197 || (GET_CODE (addr) == PLUS
6198 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6200 if (GET_CODE (addr) != PLUS)
6203 if (GET_CODE (addr) == PLUS)
6205 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6207 /* Check first to see if this is a constant offset from a @GOTOFF
6208 symbol reference. */
6209 if (local_symbolic_operand (op0, Pmode)
6210 && GET_CODE (op1) == CONST_INT)
6214 if (reload_in_progress)
6215 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6216 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6218 new = gen_rtx_PLUS (Pmode, new, op1);
6219 new = gen_rtx_CONST (Pmode, new);
6220 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6224 emit_move_insn (reg, new);
6230 if (INTVAL (op1) < -16*1024*1024
6231 || INTVAL (op1) >= 16*1024*1024)
6232 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6237 base = legitimize_pic_address (XEXP (addr, 0), reg);
6238 new = legitimize_pic_address (XEXP (addr, 1),
6239 base == reg ? NULL_RTX : reg);
6241 if (GET_CODE (new) == CONST_INT)
6242 new = plus_constant (base, INTVAL (new));
6245 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6247 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6248 new = XEXP (new, 1);
6250 new = gen_rtx_PLUS (Pmode, base, new);
6259 ix86_encode_section_info (decl, first)
6261 int first ATTRIBUTE_UNUSED;
6263 bool local_p = (*targetm.binds_local_p) (decl);
6266 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6267 if (GET_CODE (rtl) != MEM)
6269 symbol = XEXP (rtl, 0);
6270 if (GET_CODE (symbol) != SYMBOL_REF)
6273 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6274 symbol so that we may access it directly in the GOT. */
6277 SYMBOL_REF_FLAG (symbol) = local_p;
6279 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6280 "local dynamic", "initial exec" or "local exec" TLS models
6283 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6285 const char *symbol_str;
6288 enum tls_model kind = decl_tls_model (decl);
6290 if (TARGET_64BIT && ! flag_pic)
6292 /* x86-64 doesn't allow non-pic code for shared libraries,
6293 so don't generate GD/LD TLS models for non-pic code. */
6296 case TLS_MODEL_GLOBAL_DYNAMIC:
6297 kind = TLS_MODEL_INITIAL_EXEC; break;
6298 case TLS_MODEL_LOCAL_DYNAMIC:
6299 kind = TLS_MODEL_LOCAL_EXEC; break;
6305 symbol_str = XSTR (symbol, 0);
6307 if (symbol_str[0] == '%')
6309 if (symbol_str[1] == tls_model_chars[kind])
6313 len = strlen (symbol_str) + 1;
6314 newstr = alloca (len + 2);
6317 newstr[1] = tls_model_chars[kind];
6318 memcpy (newstr + 2, symbol_str, len);
6320 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6324 /* Undo the above when printing symbol names. */
6327 ix86_strip_name_encoding (str)
6337 /* Load the thread pointer into a register. */
6340 get_thread_pointer ()
6344 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6345 tp = gen_rtx_MEM (Pmode, tp);
6346 RTX_UNCHANGING_P (tp) = 1;
6347 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6348 tp = force_reg (Pmode, tp);
6353 /* Try machine-dependent ways of modifying an illegitimate address
6354 to be legitimate. If we find one, return the new, valid address.
6355 This macro is used in only one place: `memory_address' in explow.c.
6357 OLDX is the address as it was before break_out_memory_refs was called.
6358 In some cases it is useful to look at this to decide what needs to be done.
6360 MODE and WIN are passed so that this macro can use
6361 GO_IF_LEGITIMATE_ADDRESS.
6363 It is always safe for this macro to do nothing. It exists to recognize
6364 opportunities to optimize the output.
6366 For the 80386, we handle X+REG by loading X into a register R and
6367 using R+REG. R will go in a general reg and indexing will be used.
6368 However, if REG is a broken-out memory address or multiplication,
6369 nothing needs to be done because REG can certainly go in a general reg.
6371 When -fpic is used, special handling is needed for symbolic references.
6372 See comments by legitimize_pic_address in i386.c for details. */
6375 legitimize_address (x, oldx, mode)
6377 register rtx oldx ATTRIBUTE_UNUSED;
6378 enum machine_mode mode;
6383 if (TARGET_DEBUG_ADDR)
6385 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6386 GET_MODE_NAME (mode));
6390 log = tls_symbolic_operand (x, mode);
6393 rtx dest, base, off, pic;
6398 case TLS_MODEL_GLOBAL_DYNAMIC:
6399 dest = gen_reg_rtx (Pmode);
6402 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6405 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6406 insns = get_insns ();
6409 emit_libcall_block (insns, dest, rax, x);
6412 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6415 case TLS_MODEL_LOCAL_DYNAMIC:
6416 base = gen_reg_rtx (Pmode);
6419 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6422 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6423 insns = get_insns ();
6426 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6427 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6428 emit_libcall_block (insns, base, rax, note);
6431 emit_insn (gen_tls_local_dynamic_base_32 (base));
6433 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6434 off = gen_rtx_CONST (Pmode, off);
6436 return gen_rtx_PLUS (Pmode, base, off);
6438 case TLS_MODEL_INITIAL_EXEC:
6442 type = UNSPEC_GOTNTPOFF;
6446 if (reload_in_progress)
6447 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6448 pic = pic_offset_table_rtx;
6449 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6451 else if (!TARGET_GNU_TLS)
6453 pic = gen_reg_rtx (Pmode);
6454 emit_insn (gen_set_got (pic));
6455 type = UNSPEC_GOTTPOFF;
6460 type = UNSPEC_INDNTPOFF;
6463 base = get_thread_pointer ();
6465 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6466 off = gen_rtx_CONST (Pmode, off);
6468 off = gen_rtx_PLUS (Pmode, pic, off);
6469 off = gen_rtx_MEM (Pmode, off);
6470 RTX_UNCHANGING_P (off) = 1;
6471 set_mem_alias_set (off, ix86_GOT_alias_set ());
6472 dest = gen_reg_rtx (Pmode);
6474 if (TARGET_64BIT || TARGET_GNU_TLS)
6476 emit_move_insn (dest, off);
6477 return gen_rtx_PLUS (Pmode, base, dest);
6480 emit_insn (gen_subsi3 (dest, base, off));
6483 case TLS_MODEL_LOCAL_EXEC:
6484 base = get_thread_pointer ();
6486 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6487 (TARGET_64BIT || TARGET_GNU_TLS)
6488 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6489 off = gen_rtx_CONST (Pmode, off);
6491 if (TARGET_64BIT || TARGET_GNU_TLS)
6492 return gen_rtx_PLUS (Pmode, base, off);
6495 dest = gen_reg_rtx (Pmode);
6496 emit_insn (gen_subsi3 (dest, base, off));
6507 if (flag_pic && SYMBOLIC_CONST (x))
6508 return legitimize_pic_address (x, 0);
6510 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6511 if (GET_CODE (x) == ASHIFT
6512 && GET_CODE (XEXP (x, 1)) == CONST_INT
6513 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6516 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6517 GEN_INT (1 << log));
6520 if (GET_CODE (x) == PLUS)
6522 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6524 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6525 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6526 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6529 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6530 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6531 GEN_INT (1 << log));
6534 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6535 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6536 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6539 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6540 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6541 GEN_INT (1 << log));
6544 /* Put multiply first if it isn't already. */
6545 if (GET_CODE (XEXP (x, 1)) == MULT)
6547 rtx tmp = XEXP (x, 0);
6548 XEXP (x, 0) = XEXP (x, 1);
6553 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6554 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6555 created by virtual register instantiation, register elimination, and
6556 similar optimizations. */
6557 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6560 x = gen_rtx_PLUS (Pmode,
6561 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6562 XEXP (XEXP (x, 1), 0)),
6563 XEXP (XEXP (x, 1), 1));
6567 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6568 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6569 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6570 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6571 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6572 && CONSTANT_P (XEXP (x, 1)))
6575 rtx other = NULL_RTX;
6577 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6579 constant = XEXP (x, 1);
6580 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6582 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6584 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6585 other = XEXP (x, 1);
6593 x = gen_rtx_PLUS (Pmode,
6594 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6595 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6596 plus_constant (other, INTVAL (constant)));
6600 if (changed && legitimate_address_p (mode, x, FALSE))
6603 if (GET_CODE (XEXP (x, 0)) == MULT)
6606 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6609 if (GET_CODE (XEXP (x, 1)) == MULT)
6612 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6616 && GET_CODE (XEXP (x, 1)) == REG
6617 && GET_CODE (XEXP (x, 0)) == REG)
6620 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6623 x = legitimize_pic_address (x, 0);
6626 if (changed && legitimate_address_p (mode, x, FALSE))
6629 if (GET_CODE (XEXP (x, 0)) == REG)
6631 register rtx temp = gen_reg_rtx (Pmode);
6632 register rtx val = force_operand (XEXP (x, 1), temp);
6634 emit_move_insn (temp, val);
6640 else if (GET_CODE (XEXP (x, 1)) == REG)
6642 register rtx temp = gen_reg_rtx (Pmode);
6643 register rtx val = force_operand (XEXP (x, 0), temp);
6645 emit_move_insn (temp, val);
6655 /* Print an integer constant expression in assembler syntax. Addition
6656 and subtraction are the only arithmetic that may appear in these
6657 expressions. FILE is the stdio stream to write to, X is the rtx, and
6658 CODE is the operand print code from the output string. */
6661 output_pic_addr_const (file, x, code)
6668 switch (GET_CODE (x))
6678 assemble_name (file, XSTR (x, 0));
6679 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6680 fputs ("@PLT", file);
6687 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6688 assemble_name (asm_out_file, buf);
6692 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6696 /* This used to output parentheses around the expression,
6697 but that does not work on the 386 (either ATT or BSD assembler). */
6698 output_pic_addr_const (file, XEXP (x, 0), code);
6702 if (GET_MODE (x) == VOIDmode)
6704 /* We can use %d if the number is <32 bits and positive. */
6705 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6706 fprintf (file, "0x%lx%08lx",
6707 (unsigned long) CONST_DOUBLE_HIGH (x),
6708 (unsigned long) CONST_DOUBLE_LOW (x));
6710 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6713 /* We can't handle floating point constants;
6714 PRINT_OPERAND must handle them. */
6715 output_operand_lossage ("floating constant misused");
6719 /* Some assemblers need integer constants to appear first. */
6720 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6722 output_pic_addr_const (file, XEXP (x, 0), code);
6724 output_pic_addr_const (file, XEXP (x, 1), code);
6726 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6728 output_pic_addr_const (file, XEXP (x, 1), code);
6730 output_pic_addr_const (file, XEXP (x, 0), code);
6738 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6739 output_pic_addr_const (file, XEXP (x, 0), code);
6741 output_pic_addr_const (file, XEXP (x, 1), code);
6743 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6747 if (XVECLEN (x, 0) != 1)
6749 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6750 switch (XINT (x, 1))
6753 fputs ("@GOT", file);
6756 fputs ("@GOTOFF", file);
6758 case UNSPEC_GOTPCREL:
6759 fputs ("@GOTPCREL(%rip)", file);
6761 case UNSPEC_GOTTPOFF:
6762 /* FIXME: This might be @TPOFF in Sun ld too. */
6763 fputs ("@GOTTPOFF", file);
6766 fputs ("@TPOFF", file);
6770 fputs ("@TPOFF", file);
6772 fputs ("@NTPOFF", file);
6775 fputs ("@DTPOFF", file);
6777 case UNSPEC_GOTNTPOFF:
6779 fputs ("@GOTTPOFF(%rip)", file);
6781 fputs ("@GOTNTPOFF", file);
6783 case UNSPEC_INDNTPOFF:
6784 fputs ("@INDNTPOFF", file);
6787 output_operand_lossage ("invalid UNSPEC as operand");
6793 output_operand_lossage ("invalid expression as operand");
6797 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6798 We need to handle our special PIC relocations. */
6801 i386_dwarf_output_addr_const (file, x)
6806 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6810 fprintf (file, "%s", ASM_LONG);
6813 output_pic_addr_const (file, x, '\0');
6815 output_addr_const (file, x);
6819 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6820 We need to emit DTP-relative relocations. */
6823 i386_output_dwarf_dtprel (file, size, x)
6828 fputs (ASM_LONG, file);
6829 output_addr_const (file, x);
6830 fputs ("@DTPOFF", file);
6836 fputs (", 0", file);
6843 /* In the name of slightly smaller debug output, and to cater to
6844 general assembler losage, recognize PIC+GOTOFF and turn it back
6845 into a direct symbol reference. */
6848 ix86_delegitimize_address (orig_x)
6853 if (GET_CODE (x) == MEM)
6858 if (GET_CODE (x) != CONST
6859 || GET_CODE (XEXP (x, 0)) != UNSPEC
6860 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6861 || GET_CODE (orig_x) != MEM)
6863 return XVECEXP (XEXP (x, 0), 0, 0);
6866 if (GET_CODE (x) != PLUS
6867 || GET_CODE (XEXP (x, 1)) != CONST)
6870 if (GET_CODE (XEXP (x, 0)) == REG
6871 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6872 /* %ebx + GOT/GOTOFF */
6874 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6876 /* %ebx + %reg * scale + GOT/GOTOFF */
6878 if (GET_CODE (XEXP (y, 0)) == REG
6879 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6881 else if (GET_CODE (XEXP (y, 1)) == REG
6882 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6886 if (GET_CODE (y) != REG
6887 && GET_CODE (y) != MULT
6888 && GET_CODE (y) != ASHIFT)
6894 x = XEXP (XEXP (x, 1), 0);
6895 if (GET_CODE (x) == UNSPEC
6896 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6897 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6900 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6901 return XVECEXP (x, 0, 0);
6904 if (GET_CODE (x) == PLUS
6905 && GET_CODE (XEXP (x, 0)) == UNSPEC
6906 && GET_CODE (XEXP (x, 1)) == CONST_INT
6907 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6908 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6909 && GET_CODE (orig_x) != MEM)))
6911 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6913 return gen_rtx_PLUS (Pmode, y, x);
6921 put_condition_code (code, mode, reverse, fp, file)
6923 enum machine_mode mode;
6929 if (mode == CCFPmode || mode == CCFPUmode)
6931 enum rtx_code second_code, bypass_code;
6932 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6933 if (bypass_code != NIL || second_code != NIL)
6935 code = ix86_fp_compare_code_to_integer (code);
6939 code = reverse_condition (code);
6950 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6955 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6956 Those same assemblers have the same but opposite losage on cmov. */
6959 suffix = fp ? "nbe" : "a";
6962 if (mode == CCNOmode || mode == CCGOCmode)
6964 else if (mode == CCmode || mode == CCGCmode)
6975 if (mode == CCNOmode || mode == CCGOCmode)
6977 else if (mode == CCmode || mode == CCGCmode)
6986 suffix = fp ? "nb" : "ae";
6989 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6999 suffix = fp ? "u" : "p";
7002 suffix = fp ? "nu" : "np";
7007 fputs (suffix, file);
7011 print_reg (x, code, file)
7016 if (REGNO (x) == ARG_POINTER_REGNUM
7017 || REGNO (x) == FRAME_POINTER_REGNUM
7018 || REGNO (x) == FLAGS_REG
7019 || REGNO (x) == FPSR_REG)
7022 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7025 if (code == 'w' || MMX_REG_P (x))
7027 else if (code == 'b')
7029 else if (code == 'k')
7031 else if (code == 'q')
7033 else if (code == 'y')
7035 else if (code == 'h')
7038 code = GET_MODE_SIZE (GET_MODE (x));
7040 /* Irritatingly, AMD extended registers use different naming convention
7041 from the normal registers. */
7042 if (REX_INT_REG_P (x))
7049 error ("extended registers have no high halves");
7052 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7055 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7058 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7061 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7064 error ("unsupported operand size for extended register");
7072 if (STACK_TOP_P (x))
7074 fputs ("st(0)", file);
7081 if (! ANY_FP_REG_P (x))
7082 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7086 fputs (hi_reg_name[REGNO (x)], file);
7089 fputs (qi_reg_name[REGNO (x)], file);
7092 fputs (qi_high_reg_name[REGNO (x)], file);
7099 /* Locate some local-dynamic symbol still in use by this function
7100 so that we can print its name in some tls_local_dynamic_base
7104 get_some_local_dynamic_name ()
7108 if (cfun->machine->some_ld_name)
7109 return cfun->machine->some_ld_name;
7111 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7113 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7114 return cfun->machine->some_ld_name;
7120 get_some_local_dynamic_name_1 (px, data)
7122 void *data ATTRIBUTE_UNUSED;
7126 if (GET_CODE (x) == SYMBOL_REF
7127 && local_dynamic_symbolic_operand (x, Pmode))
7129 cfun->machine->some_ld_name = XSTR (x, 0);
7137 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7138 C -- print opcode suffix for set/cmov insn.
7139 c -- like C, but print reversed condition
7140 F,f -- likewise, but for floating-point.
7141 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7143 R -- print the prefix for register names.
7144 z -- print the opcode suffix for the size of the current operand.
7145 * -- print a star (in certain assembler syntax)
7146 A -- print an absolute memory reference.
7147 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7148 s -- print a shift double count, followed by the assemblers argument
7150 b -- print the QImode name of the register for the indicated operand.
7151 %b0 would print %al if operands[0] is reg 0.
7152 w -- likewise, print the HImode name of the register.
7153 k -- likewise, print the SImode name of the register.
7154 q -- likewise, print the DImode name of the register.
7155 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7156 y -- print "st(0)" instead of "st" as a register.
7157 D -- print condition for SSE cmp instruction.
7158 P -- if PIC, print an @PLT suffix.
7159 X -- don't print any sort of PIC '@' suffix for a symbol.
7160 & -- print some in-use local-dynamic symbol name.
7164 print_operand (file, x, code)
7174 if (ASSEMBLER_DIALECT == ASM_ATT)
7179 assemble_name (file, get_some_local_dynamic_name ());
7183 if (ASSEMBLER_DIALECT == ASM_ATT)
7185 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7187 /* Intel syntax. For absolute addresses, registers should not
7188 be surrounded by braces. */
7189 if (GET_CODE (x) != REG)
7192 PRINT_OPERAND (file, x, 0);
7200 PRINT_OPERAND (file, x, 0);
7205 if (ASSEMBLER_DIALECT == ASM_ATT)
7210 if (ASSEMBLER_DIALECT == ASM_ATT)
7215 if (ASSEMBLER_DIALECT == ASM_ATT)
7220 if (ASSEMBLER_DIALECT == ASM_ATT)
7225 if (ASSEMBLER_DIALECT == ASM_ATT)
7230 if (ASSEMBLER_DIALECT == ASM_ATT)
7235 /* 387 opcodes don't get size suffixes if the operands are
7237 if (STACK_REG_P (x))
7240 /* Likewise if using Intel opcodes. */
7241 if (ASSEMBLER_DIALECT == ASM_INTEL)
7244 /* This is the size of op from size of operand. */
7245 switch (GET_MODE_SIZE (GET_MODE (x)))
7248 #ifdef HAVE_GAS_FILDS_FISTS
7254 if (GET_MODE (x) == SFmode)
7269 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7271 #ifdef GAS_MNEMONICS
7297 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7299 PRINT_OPERAND (file, x, 0);
7305 /* Little bit of braindamage here. The SSE compare instructions
7306 does use completely different names for the comparisons that the
7307 fp conditional moves. */
7308 switch (GET_CODE (x))
7323 fputs ("unord", file);
7327 fputs ("neq", file);
7331 fputs ("nlt", file);
7335 fputs ("nle", file);
7338 fputs ("ord", file);
7346 #ifdef CMOV_SUN_AS_SYNTAX
7347 if (ASSEMBLER_DIALECT == ASM_ATT)
7349 switch (GET_MODE (x))
7351 case HImode: putc ('w', file); break;
7353 case SFmode: putc ('l', file); break;
7355 case DFmode: putc ('q', file); break;
7363 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7366 #ifdef CMOV_SUN_AS_SYNTAX
7367 if (ASSEMBLER_DIALECT == ASM_ATT)
7370 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7373 /* Like above, but reverse condition */
7375 /* Check to see if argument to %c is really a constant
7376 and not a condition code which needs to be reversed. */
7377 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7379 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7382 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7385 #ifdef CMOV_SUN_AS_SYNTAX
7386 if (ASSEMBLER_DIALECT == ASM_ATT)
7389 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7395 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7398 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7401 int pred_val = INTVAL (XEXP (x, 0));
7403 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7404 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7406 int taken = pred_val > REG_BR_PROB_BASE / 2;
7407 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7409 /* Emit hints only in the case default branch prediction
7410 heuristics would fail. */
7411 if (taken != cputaken)
7413 /* We use 3e (DS) prefix for taken branches and
7414 2e (CS) prefix for not taken branches. */
7416 fputs ("ds ; ", file);
7418 fputs ("cs ; ", file);
7425 output_operand_lossage ("invalid operand code `%c'", code);
7429 if (GET_CODE (x) == REG)
7431 PRINT_REG (x, code, file);
7434 else if (GET_CODE (x) == MEM)
7436 /* No `byte ptr' prefix for call instructions. */
7437 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7440 switch (GET_MODE_SIZE (GET_MODE (x)))
7442 case 1: size = "BYTE"; break;
7443 case 2: size = "WORD"; break;
7444 case 4: size = "DWORD"; break;
7445 case 8: size = "QWORD"; break;
7446 case 12: size = "XWORD"; break;
7447 case 16: size = "XMMWORD"; break;
7452 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7455 else if (code == 'w')
7457 else if (code == 'k')
7461 fputs (" PTR ", file);
7465 if (flag_pic && CONSTANT_ADDRESS_P (x))
7466 output_pic_addr_const (file, x, code);
7467 /* Avoid (%rip) for call operands. */
7468 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7469 && GET_CODE (x) != CONST_INT)
7470 output_addr_const (file, x);
7471 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7472 output_operand_lossage ("invalid constraints for operand");
7477 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7482 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7483 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7485 if (ASSEMBLER_DIALECT == ASM_ATT)
7487 fprintf (file, "0x%lx", l);
7490 /* These float cases don't actually occur as immediate operands. */
7491 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7495 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7496 fprintf (file, "%s", dstr);
7499 else if (GET_CODE (x) == CONST_DOUBLE
7500 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7504 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7505 fprintf (file, "%s", dstr);
7512 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7514 if (ASSEMBLER_DIALECT == ASM_ATT)
7517 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7518 || GET_CODE (x) == LABEL_REF)
7520 if (ASSEMBLER_DIALECT == ASM_ATT)
7523 fputs ("OFFSET FLAT:", file);
7526 if (GET_CODE (x) == CONST_INT)
7527 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7529 output_pic_addr_const (file, x, code);
7531 output_addr_const (file, x);
7535 /* Print a memory operand whose address is ADDR. */
7538 print_operand_address (file, addr)
7542 struct ix86_address parts;
7543 rtx base, index, disp;
7546 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7548 if (ASSEMBLER_DIALECT == ASM_INTEL)
7549 fputs ("DWORD PTR ", file);
7550 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7553 fputs ("fs:0", file);
7555 fputs ("gs:0", file);
7559 if (! ix86_decompose_address (addr, &parts))
7563 index = parts.index;
7565 scale = parts.scale;
7567 if (!base && !index)
7569 /* Displacement only requires special attention. */
7571 if (GET_CODE (disp) == CONST_INT)
7573 if (ASSEMBLER_DIALECT == ASM_INTEL)
7575 if (USER_LABEL_PREFIX[0] == 0)
7577 fputs ("ds:", file);
7579 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7582 output_pic_addr_const (file, addr, 0);
7584 output_addr_const (file, addr);
7586 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7588 && ((GET_CODE (addr) == SYMBOL_REF
7589 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7590 || GET_CODE (addr) == LABEL_REF
7591 || (GET_CODE (addr) == CONST
7592 && GET_CODE (XEXP (addr, 0)) == PLUS
7593 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7594 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7595 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7596 fputs ("(%rip)", file);
7600 if (ASSEMBLER_DIALECT == ASM_ATT)
7605 output_pic_addr_const (file, disp, 0);
7606 else if (GET_CODE (disp) == LABEL_REF)
7607 output_asm_label (disp);
7609 output_addr_const (file, disp);
7614 PRINT_REG (base, 0, file);
7618 PRINT_REG (index, 0, file);
7620 fprintf (file, ",%d", scale);
7626 rtx offset = NULL_RTX;
7630 /* Pull out the offset of a symbol; print any symbol itself. */
7631 if (GET_CODE (disp) == CONST
7632 && GET_CODE (XEXP (disp, 0)) == PLUS
7633 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7635 offset = XEXP (XEXP (disp, 0), 1);
7636 disp = gen_rtx_CONST (VOIDmode,
7637 XEXP (XEXP (disp, 0), 0));
7641 output_pic_addr_const (file, disp, 0);
7642 else if (GET_CODE (disp) == LABEL_REF)
7643 output_asm_label (disp);
7644 else if (GET_CODE (disp) == CONST_INT)
7647 output_addr_const (file, disp);
7653 PRINT_REG (base, 0, file);
7656 if (INTVAL (offset) >= 0)
7658 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7662 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7669 PRINT_REG (index, 0, file);
7671 fprintf (file, "*%d", scale);
7679 output_addr_const_extra (file, x)
7685 if (GET_CODE (x) != UNSPEC)
7688 op = XVECEXP (x, 0, 0);
7689 switch (XINT (x, 1))
7691 case UNSPEC_GOTTPOFF:
7692 output_addr_const (file, op);
7693 /* FIXME: This might be @TPOFF in Sun ld. */
7694 fputs ("@GOTTPOFF", file);
7697 output_addr_const (file, op);
7698 fputs ("@TPOFF", file);
7701 output_addr_const (file, op);
7703 fputs ("@TPOFF", file);
7705 fputs ("@NTPOFF", file);
7708 output_addr_const (file, op);
7709 fputs ("@DTPOFF", file);
7711 case UNSPEC_GOTNTPOFF:
7712 output_addr_const (file, op);
7714 fputs ("@GOTTPOFF(%rip)", file);
7716 fputs ("@GOTNTPOFF", file);
7718 case UNSPEC_INDNTPOFF:
7719 output_addr_const (file, op);
7720 fputs ("@INDNTPOFF", file);
7730 /* Split one or more DImode RTL references into pairs of SImode
7731 references. The RTL can be REG, offsettable MEM, integer constant, or
7732 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7733 split and "num" is its length. lo_half and hi_half are output arrays
7734 that parallel "operands". */
7737 split_di (operands, num, lo_half, hi_half)
7740 rtx lo_half[], hi_half[];
7744 rtx op = operands[num];
7746 /* simplify_subreg refuse to split volatile memory addresses,
7747 but we still have to handle it. */
7748 if (GET_CODE (op) == MEM)
7750 lo_half[num] = adjust_address (op, SImode, 0);
7751 hi_half[num] = adjust_address (op, SImode, 4);
7755 lo_half[num] = simplify_gen_subreg (SImode, op,
7756 GET_MODE (op) == VOIDmode
7757 ? DImode : GET_MODE (op), 0);
7758 hi_half[num] = simplify_gen_subreg (SImode, op,
7759 GET_MODE (op) == VOIDmode
7760 ? DImode : GET_MODE (op), 4);
7764 /* Split one or more TImode RTL references into pairs of SImode
7765 references. The RTL can be REG, offsettable MEM, integer constant, or
7766 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7767 split and "num" is its length. lo_half and hi_half are output arrays
7768 that parallel "operands". */
7771 split_ti (operands, num, lo_half, hi_half)
7774 rtx lo_half[], hi_half[];
7778 rtx op = operands[num];
7780 /* simplify_subreg refuse to split volatile memory addresses, but we
7781 still have to handle it. */
7782 if (GET_CODE (op) == MEM)
7784 lo_half[num] = adjust_address (op, DImode, 0);
7785 hi_half[num] = adjust_address (op, DImode, 8);
7789 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7790 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7795 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7796 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7797 is the expression of the binary operation. The output may either be
7798 emitted here, or returned to the caller, like all output_* functions.
7800 There is no guarantee that the operands are the same mode, as they
7801 might be within FLOAT or FLOAT_EXTEND expressions. */
7803 #ifndef SYSV386_COMPAT
7804 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7805 wants to fix the assemblers because that causes incompatibility
7806 with gcc. No-one wants to fix gcc because that causes
7807 incompatibility with assemblers... You can use the option of
7808 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7809 #define SYSV386_COMPAT 1
7813 output_387_binary_op (insn, operands)
7817 static char buf[30];
7820 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7822 #ifdef ENABLE_CHECKING
7823 /* Even if we do not want to check the inputs, this documents input
7824 constraints. Which helps in understanding the following code. */
7825 if (STACK_REG_P (operands[0])
7826 && ((REG_P (operands[1])
7827 && REGNO (operands[0]) == REGNO (operands[1])
7828 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7829 || (REG_P (operands[2])
7830 && REGNO (operands[0]) == REGNO (operands[2])
7831 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7832 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7838 switch (GET_CODE (operands[3]))
7841 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7842 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7850 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7851 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7859 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7860 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7868 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7869 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7883 if (GET_MODE (operands[0]) == SFmode)
7884 strcat (buf, "ss\t{%2, %0|%0, %2}");
7886 strcat (buf, "sd\t{%2, %0|%0, %2}");
7891 switch (GET_CODE (operands[3]))
7895 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7897 rtx temp = operands[2];
7898 operands[2] = operands[1];
7902 /* know operands[0] == operands[1]. */
7904 if (GET_CODE (operands[2]) == MEM)
7910 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7912 if (STACK_TOP_P (operands[0]))
7913 /* How is it that we are storing to a dead operand[2]?
7914 Well, presumably operands[1] is dead too. We can't
7915 store the result to st(0) as st(0) gets popped on this
7916 instruction. Instead store to operands[2] (which I
7917 think has to be st(1)). st(1) will be popped later.
7918 gcc <= 2.8.1 didn't have this check and generated
7919 assembly code that the Unixware assembler rejected. */
7920 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7922 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7926 if (STACK_TOP_P (operands[0]))
7927 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7929 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7934 if (GET_CODE (operands[1]) == MEM)
7940 if (GET_CODE (operands[2]) == MEM)
7946 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7949 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7950 derived assemblers, confusingly reverse the direction of
7951 the operation for fsub{r} and fdiv{r} when the
7952 destination register is not st(0). The Intel assembler
7953 doesn't have this brain damage. Read !SYSV386_COMPAT to
7954 figure out what the hardware really does. */
7955 if (STACK_TOP_P (operands[0]))
7956 p = "{p\t%0, %2|rp\t%2, %0}";
7958 p = "{rp\t%2, %0|p\t%0, %2}";
7960 if (STACK_TOP_P (operands[0]))
7961 /* As above for fmul/fadd, we can't store to st(0). */
7962 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7964 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7969 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7972 if (STACK_TOP_P (operands[0]))
7973 p = "{rp\t%0, %1|p\t%1, %0}";
7975 p = "{p\t%1, %0|rp\t%0, %1}";
7977 if (STACK_TOP_P (operands[0]))
7978 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7980 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7985 if (STACK_TOP_P (operands[0]))
7987 if (STACK_TOP_P (operands[1]))
7988 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7990 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7993 else if (STACK_TOP_P (operands[1]))
7996 p = "{\t%1, %0|r\t%0, %1}";
7998 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8004 p = "{r\t%2, %0|\t%0, %2}";
8006 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8019 /* Output code to initialize control word copies used by
8020 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8021 is set to control word rounding downwards. */
8023 emit_i387_cw_initialization (normal, round_down)
8024 rtx normal, round_down;
8026 rtx reg = gen_reg_rtx (HImode);
8028 emit_insn (gen_x86_fnstcw_1 (normal));
8029 emit_move_insn (reg, normal);
8030 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8032 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8034 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8035 emit_move_insn (round_down, reg);
8038 /* Output code for INSN to convert a float to a signed int. OPERANDS
8039 are the insn operands. The output may be [HSD]Imode and the input
8040 operand may be [SDX]Fmode. */
8043 output_fix_trunc (insn, operands)
8047 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8048 int dimode_p = GET_MODE (operands[0]) == DImode;
8050 /* Jump through a hoop or two for DImode, since the hardware has no
8051 non-popping instruction. We used to do this a different way, but
8052 that was somewhat fragile and broke with post-reload splitters. */
8053 if (dimode_p && !stack_top_dies)
8054 output_asm_insn ("fld\t%y1", operands);
8056 if (!STACK_TOP_P (operands[1]))
8059 if (GET_CODE (operands[0]) != MEM)
8062 output_asm_insn ("fldcw\t%3", operands);
8063 if (stack_top_dies || dimode_p)
8064 output_asm_insn ("fistp%z0\t%0", operands);
8066 output_asm_insn ("fist%z0\t%0", operands);
8067 output_asm_insn ("fldcw\t%2", operands);
8072 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8073 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8074 when fucom should be used. */
8077 output_fp_compare (insn, operands, eflags_p, unordered_p)
8080 int eflags_p, unordered_p;
8083 rtx cmp_op0 = operands[0];
8084 rtx cmp_op1 = operands[1];
8085 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8090 cmp_op1 = operands[2];
8094 if (GET_MODE (operands[0]) == SFmode)
8096 return "ucomiss\t{%1, %0|%0, %1}";
8098 return "comiss\t{%1, %0|%0, %1}";
8101 return "ucomisd\t{%1, %0|%0, %1}";
8103 return "comisd\t{%1, %0|%0, %1}";
8106 if (! STACK_TOP_P (cmp_op0))
8109 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8111 if (STACK_REG_P (cmp_op1)
8113 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8114 && REGNO (cmp_op1) != FIRST_STACK_REG)
8116 /* If both the top of the 387 stack dies, and the other operand
8117 is also a stack register that dies, then this must be a
8118 `fcompp' float compare */
8122 /* There is no double popping fcomi variant. Fortunately,
8123 eflags is immune from the fstp's cc clobbering. */
8125 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8127 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8135 return "fucompp\n\tfnstsw\t%0";
8137 return "fcompp\n\tfnstsw\t%0";
8150 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8152 static const char * const alt[24] =
8164 "fcomi\t{%y1, %0|%0, %y1}",
8165 "fcomip\t{%y1, %0|%0, %y1}",
8166 "fucomi\t{%y1, %0|%0, %y1}",
8167 "fucomip\t{%y1, %0|%0, %y1}",
8174 "fcom%z2\t%y2\n\tfnstsw\t%0",
8175 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8176 "fucom%z2\t%y2\n\tfnstsw\t%0",
8177 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8179 "ficom%z2\t%y2\n\tfnstsw\t%0",
8180 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8188 mask = eflags_p << 3;
8189 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8190 mask |= unordered_p << 1;
8191 mask |= stack_top_dies;
8204 ix86_output_addr_vec_elt (file, value)
8208 const char *directive = ASM_LONG;
8213 directive = ASM_QUAD;
8219 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8223 ix86_output_addr_diff_elt (file, value, rel)
8228 fprintf (file, "%s%s%d-%s%d\n",
8229 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8230 else if (HAVE_AS_GOTOFF_IN_DATA)
8231 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8233 else if (TARGET_MACHO)
8234 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8235 machopic_function_base_name () + 1);
8238 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8239 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8242 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8246 ix86_expand_clear (dest)
8251 /* We play register width games, which are only valid after reload. */
8252 if (!reload_completed)
8255 /* Avoid HImode and its attendant prefix byte. */
8256 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8257 dest = gen_rtx_REG (SImode, REGNO (dest));
8259 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8261 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8262 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8264 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8265 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8271 /* X is an unchanging MEM. If it is a constant pool reference, return
8272 the constant pool rtx, else NULL. */
8275 maybe_get_pool_constant (x)
8278 x = ix86_delegitimize_address (XEXP (x, 0));
8280 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8281 return get_pool_constant (x);
8287 ix86_expand_move (mode, operands)
8288 enum machine_mode mode;
8291 int strict = (reload_in_progress || reload_completed);
8292 rtx insn, op0, op1, tmp;
8297 if (tls_symbolic_operand (op1, Pmode))
8299 op1 = legitimize_address (op1, op1, VOIDmode);
8300 if (GET_CODE (op0) == MEM)
8302 tmp = gen_reg_rtx (mode);
8303 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8307 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8312 rtx temp = ((reload_in_progress
8313 || ((op0 && GET_CODE (op0) == REG)
8315 ? op0 : gen_reg_rtx (Pmode));
8316 op1 = machopic_indirect_data_reference (op1, temp);
8317 op1 = machopic_legitimize_pic_address (op1, mode,
8318 temp == op1 ? 0 : temp);
8322 if (MACHOPIC_INDIRECT)
8323 op1 = machopic_indirect_data_reference (op1, 0);
8327 insn = gen_rtx_SET (VOIDmode, op0, op1);
8331 #endif /* TARGET_MACHO */
8332 if (GET_CODE (op0) == MEM)
8333 op1 = force_reg (Pmode, op1);
8337 if (GET_CODE (temp) != REG)
8338 temp = gen_reg_rtx (Pmode);
8339 temp = legitimize_pic_address (op1, temp);
8347 if (GET_CODE (op0) == MEM
8348 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8349 || !push_operand (op0, mode))
8350 && GET_CODE (op1) == MEM)
8351 op1 = force_reg (mode, op1);
8353 if (push_operand (op0, mode)
8354 && ! general_no_elim_operand (op1, mode))
8355 op1 = copy_to_mode_reg (mode, op1);
8357 /* Force large constants in 64bit compilation into register
8358 to get them CSEed. */
8359 if (TARGET_64BIT && mode == DImode
8360 && immediate_operand (op1, mode)
8361 && !x86_64_zero_extended_value (op1)
8362 && !register_operand (op0, mode)
8363 && optimize && !reload_completed && !reload_in_progress)
8364 op1 = copy_to_mode_reg (mode, op1);
8366 if (FLOAT_MODE_P (mode))
8368 /* If we are loading a floating point constant to a register,
8369 force the value to memory now, since we'll get better code
8370 out the back end. */
8374 else if (GET_CODE (op1) == CONST_DOUBLE
8375 && register_operand (op0, mode))
8376 op1 = validize_mem (force_const_mem (mode, op1));
8380 insn = gen_rtx_SET (VOIDmode, op0, op1);
8386 ix86_expand_vector_move (mode, operands)
8387 enum machine_mode mode;
8390 /* Force constants other than zero into memory. We do not know how
8391 the instructions used to build constants modify the upper 64 bits
8392 of the register, once we have that information we may be able
8393 to handle some of them more efficiently. */
8394 if ((reload_in_progress | reload_completed) == 0
8395 && register_operand (operands[0], mode)
8396 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8397 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8399 /* Make operand1 a register if it isn't already. */
8401 && !register_operand (operands[0], mode)
8402 && !register_operand (operands[1], mode))
8404 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8405 emit_move_insn (operands[0], temp);
8409 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8412 /* Attempt to expand a binary operator. Make the expansion closer to the
8413 actual machine, then just general_operand, which will allow 3 separate
8414 memory references (one output, two input) in a single insn. */
8417 ix86_expand_binary_operator (code, mode, operands)
8419 enum machine_mode mode;
8422 int matching_memory;
8423 rtx src1, src2, dst, op, clob;
8429 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8430 if (GET_RTX_CLASS (code) == 'c'
8431 && (rtx_equal_p (dst, src2)
8432 || immediate_operand (src1, mode)))
8439 /* If the destination is memory, and we do not have matching source
8440 operands, do things in registers. */
8441 matching_memory = 0;
8442 if (GET_CODE (dst) == MEM)
8444 if (rtx_equal_p (dst, src1))
8445 matching_memory = 1;
8446 else if (GET_RTX_CLASS (code) == 'c'
8447 && rtx_equal_p (dst, src2))
8448 matching_memory = 2;
8450 dst = gen_reg_rtx (mode);
8453 /* Both source operands cannot be in memory. */
8454 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8456 if (matching_memory != 2)
8457 src2 = force_reg (mode, src2);
8459 src1 = force_reg (mode, src1);
8462 /* If the operation is not commutable, source 1 cannot be a constant
8463 or non-matching memory. */
8464 if ((CONSTANT_P (src1)
8465 || (!matching_memory && GET_CODE (src1) == MEM))
8466 && GET_RTX_CLASS (code) != 'c')
8467 src1 = force_reg (mode, src1);
8469 /* If optimizing, copy to regs to improve CSE */
8470 if (optimize && ! no_new_pseudos)
8472 if (GET_CODE (dst) == MEM)
8473 dst = gen_reg_rtx (mode);
8474 if (GET_CODE (src1) == MEM)
8475 src1 = force_reg (mode, src1);
8476 if (GET_CODE (src2) == MEM)
8477 src2 = force_reg (mode, src2);
8480 /* Emit the instruction. */
8482 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8483 if (reload_in_progress)
8485 /* Reload doesn't know about the flags register, and doesn't know that
8486 it doesn't want to clobber it. We can only do this with PLUS. */
8493 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8494 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8497 /* Fix up the destination if needed. */
8498 if (dst != operands[0])
8499 emit_move_insn (operands[0], dst);
8502 /* Return TRUE or FALSE depending on whether the binary operator meets the
8503 appropriate constraints. */
8506 ix86_binary_operator_ok (code, mode, operands)
8508 enum machine_mode mode ATTRIBUTE_UNUSED;
8511 /* Both source operands cannot be in memory. */
8512 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8514 /* If the operation is not commutable, source 1 cannot be a constant. */
8515 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8517 /* If the destination is memory, we must have a matching source operand. */
8518 if (GET_CODE (operands[0]) == MEM
8519 && ! (rtx_equal_p (operands[0], operands[1])
8520 || (GET_RTX_CLASS (code) == 'c'
8521 && rtx_equal_p (operands[0], operands[2]))))
8523 /* If the operation is not commutable and the source 1 is memory, we must
8524 have a matching destination. */
8525 if (GET_CODE (operands[1]) == MEM
8526 && GET_RTX_CLASS (code) != 'c'
8527 && ! rtx_equal_p (operands[0], operands[1]))
8532 /* Attempt to expand a unary operator. Make the expansion closer to the
8533 actual machine, then just general_operand, which will allow 2 separate
8534 memory references (one output, one input) in a single insn. */
8537 ix86_expand_unary_operator (code, mode, operands)
8539 enum machine_mode mode;
8542 int matching_memory;
8543 rtx src, dst, op, clob;
8548 /* If the destination is memory, and we do not have matching source
8549 operands, do things in registers. */
8550 matching_memory = 0;
8551 if (GET_CODE (dst) == MEM)
8553 if (rtx_equal_p (dst, src))
8554 matching_memory = 1;
8556 dst = gen_reg_rtx (mode);
8559 /* When source operand is memory, destination must match. */
8560 if (!matching_memory && GET_CODE (src) == MEM)
8561 src = force_reg (mode, src);
8563 /* If optimizing, copy to regs to improve CSE */
8564 if (optimize && ! no_new_pseudos)
8566 if (GET_CODE (dst) == MEM)
8567 dst = gen_reg_rtx (mode);
8568 if (GET_CODE (src) == MEM)
8569 src = force_reg (mode, src);
8572 /* Emit the instruction. */
8574 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8575 if (reload_in_progress || code == NOT)
8577 /* Reload doesn't know about the flags register, and doesn't know that
8578 it doesn't want to clobber it. */
8585 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8586 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8589 /* Fix up the destination if needed. */
8590 if (dst != operands[0])
8591 emit_move_insn (operands[0], dst);
8594 /* Return TRUE or FALSE depending on whether the unary operator meets the
8595 appropriate constraints. */
8598 ix86_unary_operator_ok (code, mode, operands)
8599 enum rtx_code code ATTRIBUTE_UNUSED;
8600 enum machine_mode mode ATTRIBUTE_UNUSED;
8601 rtx operands[2] ATTRIBUTE_UNUSED;
8603 /* If one of operands is memory, source and destination must match. */
8604 if ((GET_CODE (operands[0]) == MEM
8605 || GET_CODE (operands[1]) == MEM)
8606 && ! rtx_equal_p (operands[0], operands[1]))
8611 /* Return TRUE or FALSE depending on whether the first SET in INSN
8612 has source and destination with matching CC modes, and that the
8613 CC mode is at least as constrained as REQ_MODE. */
8616 ix86_match_ccmode (insn, req_mode)
8618 enum machine_mode req_mode;
8621 enum machine_mode set_mode;
8623 set = PATTERN (insn);
8624 if (GET_CODE (set) == PARALLEL)
8625 set = XVECEXP (set, 0, 0);
8626 if (GET_CODE (set) != SET)
8628 if (GET_CODE (SET_SRC (set)) != COMPARE)
8631 set_mode = GET_MODE (SET_DEST (set));
8635 if (req_mode != CCNOmode
8636 && (req_mode != CCmode
8637 || XEXP (SET_SRC (set), 1) != const0_rtx))
8641 if (req_mode == CCGCmode)
8645 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8649 if (req_mode == CCZmode)
8659 return (GET_MODE (SET_SRC (set)) == set_mode);
8662 /* Generate insn patterns to do an integer compare of OPERANDS. */
8665 ix86_expand_int_compare (code, op0, op1)
8669 enum machine_mode cmpmode;
8672 cmpmode = SELECT_CC_MODE (code, op0, op1);
8673 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8675 /* This is very simple, but making the interface the same as in the
8676 FP case makes the rest of the code easier. */
8677 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8678 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8680 /* Return the test that should be put into the flags user, i.e.
8681 the bcc, scc, or cmov instruction. */
8682 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8685 /* Figure out whether to use ordered or unordered fp comparisons.
8686 Return the appropriate mode to use. */
8689 ix86_fp_compare_mode (code)
8690 enum rtx_code code ATTRIBUTE_UNUSED;
8692 /* ??? In order to make all comparisons reversible, we do all comparisons
8693 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8694 all forms trapping and nontrapping comparisons, we can make inequality
8695 comparisons trapping again, since it results in better code when using
8696 FCOM based compares. */
8697 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8701 ix86_cc_mode (code, op0, op1)
8705 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8706 return ix86_fp_compare_mode (code);
8709 /* Only zero flag is needed. */
8711 case NE: /* ZF!=0 */
8713 /* Codes needing carry flag. */
8714 case GEU: /* CF=0 */
8715 case GTU: /* CF=0 & ZF=0 */
8716 case LTU: /* CF=1 */
8717 case LEU: /* CF=1 | ZF=1 */
8719 /* Codes possibly doable only with sign flag when
8720 comparing against zero. */
8721 case GE: /* SF=OF or SF=0 */
8722 case LT: /* SF<>OF or SF=1 */
8723 if (op1 == const0_rtx)
8726 /* For other cases Carry flag is not required. */
8728 /* Codes doable only with sign flag when comparing
8729 against zero, but we miss jump instruction for it
8730 so we need to use relational tests against overflow
8731 that thus needs to be zero. */
8732 case GT: /* ZF=0 & SF=OF */
8733 case LE: /* ZF=1 | SF<>OF */
8734 if (op1 == const0_rtx)
8738 /* strcmp pattern do (use flags) and combine may ask us for proper
8747 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8750 ix86_use_fcomi_compare (code)
8751 enum rtx_code code ATTRIBUTE_UNUSED;
8753 enum rtx_code swapped_code = swap_condition (code);
8754 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8755 || (ix86_fp_comparison_cost (swapped_code)
8756 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8759 /* Swap, force into registers, or otherwise massage the two operands
8760 to a fp comparison. The operands are updated in place; the new
8761 comparison code is returned. */
8763 static enum rtx_code
8764 ix86_prepare_fp_compare_args (code, pop0, pop1)
8768 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8769 rtx op0 = *pop0, op1 = *pop1;
8770 enum machine_mode op_mode = GET_MODE (op0);
8771 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8773 /* All of the unordered compare instructions only work on registers.
8774 The same is true of the XFmode compare instructions. The same is
8775 true of the fcomi compare instructions. */
8778 && (fpcmp_mode == CCFPUmode
8779 || op_mode == XFmode
8780 || op_mode == TFmode
8781 || ix86_use_fcomi_compare (code)))
8783 op0 = force_reg (op_mode, op0);
8784 op1 = force_reg (op_mode, op1);
8788 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8789 things around if they appear profitable, otherwise force op0
8792 if (standard_80387_constant_p (op0) == 0
8793 || (GET_CODE (op0) == MEM
8794 && ! (standard_80387_constant_p (op1) == 0
8795 || GET_CODE (op1) == MEM)))
8798 tmp = op0, op0 = op1, op1 = tmp;
8799 code = swap_condition (code);
8802 if (GET_CODE (op0) != REG)
8803 op0 = force_reg (op_mode, op0);
8805 if (CONSTANT_P (op1))
8807 if (standard_80387_constant_p (op1))
8808 op1 = force_reg (op_mode, op1);
8810 op1 = validize_mem (force_const_mem (op_mode, op1));
8814 /* Try to rearrange the comparison to make it cheaper. */
8815 if (ix86_fp_comparison_cost (code)
8816 > ix86_fp_comparison_cost (swap_condition (code))
8817 && (GET_CODE (op1) == REG || !no_new_pseudos))
8820 tmp = op0, op0 = op1, op1 = tmp;
8821 code = swap_condition (code);
8822 if (GET_CODE (op0) != REG)
8823 op0 = force_reg (op_mode, op0);
8831 /* Convert comparison codes we use to represent FP comparison to integer
8832 code that will result in proper branch. Return UNKNOWN if no such code
8834 static enum rtx_code
8835 ix86_fp_compare_code_to_integer (code)
8865 /* Split comparison code CODE into comparisons we can do using branch
8866 instructions. BYPASS_CODE is comparison code for branch that will
8867 branch around FIRST_CODE and SECOND_CODE. If some of branches
8868 is not required, set value to NIL.
8869 We never require more than two branches. */
8871 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8872 enum rtx_code code, *bypass_code, *first_code, *second_code;
8878 /* The fcomi comparison sets flags as follows:
8888 case GT: /* GTU - CF=0 & ZF=0 */
8889 case GE: /* GEU - CF=0 */
8890 case ORDERED: /* PF=0 */
8891 case UNORDERED: /* PF=1 */
8892 case UNEQ: /* EQ - ZF=1 */
8893 case UNLT: /* LTU - CF=1 */
8894 case UNLE: /* LEU - CF=1 | ZF=1 */
8895 case LTGT: /* EQ - ZF=0 */
8897 case LT: /* LTU - CF=1 - fails on unordered */
8899 *bypass_code = UNORDERED;
8901 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8903 *bypass_code = UNORDERED;
8905 case EQ: /* EQ - ZF=1 - fails on unordered */
8907 *bypass_code = UNORDERED;
8909 case NE: /* NE - ZF=0 - fails on unordered */
8911 *second_code = UNORDERED;
8913 case UNGE: /* GEU - CF=0 - fails on unordered */
8915 *second_code = UNORDERED;
8917 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8919 *second_code = UNORDERED;
8924 if (!TARGET_IEEE_FP)
8931 /* Return cost of comparison done fcom + arithmetics operations on AX.
8932 All following functions do use number of instructions as a cost metrics.
8933 In future this should be tweaked to compute bytes for optimize_size and
8934 take into account performance of various instructions on various CPUs. */
8936 ix86_fp_comparison_arithmetics_cost (code)
8939 if (!TARGET_IEEE_FP)
8941 /* The cost of code output by ix86_expand_fp_compare. */
8969 /* Return cost of comparison done using fcomi operation.
8970 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8972 ix86_fp_comparison_fcomi_cost (code)
8975 enum rtx_code bypass_code, first_code, second_code;
8976 /* Return arbitrarily high cost when instruction is not supported - this
8977 prevents gcc from using it. */
8980 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8981 return (bypass_code != NIL || second_code != NIL) + 2;
8984 /* Return cost of comparison done using sahf operation.
8985 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8987 ix86_fp_comparison_sahf_cost (code)
8990 enum rtx_code bypass_code, first_code, second_code;
8991 /* Return arbitrarily high cost when instruction is not preferred - this
8992 avoids gcc from using it. */
8993 if (!TARGET_USE_SAHF && !optimize_size)
8995 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8996 return (bypass_code != NIL || second_code != NIL) + 3;
8999 /* Compute cost of the comparison done using any method.
9000 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9002 ix86_fp_comparison_cost (code)
9005 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9008 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9009 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9011 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9012 if (min > sahf_cost)
9014 if (min > fcomi_cost)
9019 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9022 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
9024 rtx op0, op1, scratch;
9028 enum machine_mode fpcmp_mode, intcmp_mode;
9030 int cost = ix86_fp_comparison_cost (code);
9031 enum rtx_code bypass_code, first_code, second_code;
9033 fpcmp_mode = ix86_fp_compare_mode (code);
9034 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9037 *second_test = NULL_RTX;
9039 *bypass_test = NULL_RTX;
9041 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9043 /* Do fcomi/sahf based test when profitable. */
9044 if ((bypass_code == NIL || bypass_test)
9045 && (second_code == NIL || second_test)
9046 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9050 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9051 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9057 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9058 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9060 scratch = gen_reg_rtx (HImode);
9061 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9062 emit_insn (gen_x86_sahf_1 (scratch));
9065 /* The FP codes work out to act like unsigned. */
9066 intcmp_mode = fpcmp_mode;
9068 if (bypass_code != NIL)
9069 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9070 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9072 if (second_code != NIL)
9073 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9074 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9079 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9080 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9081 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9083 scratch = gen_reg_rtx (HImode);
9084 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9086 /* In the unordered case, we have to check C2 for NaN's, which
9087 doesn't happen to work out to anything nice combination-wise.
9088 So do some bit twiddling on the value we've got in AH to come
9089 up with an appropriate set of condition codes. */
9091 intcmp_mode = CCNOmode;
9096 if (code == GT || !TARGET_IEEE_FP)
9098 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9103 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9104 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9105 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9106 intcmp_mode = CCmode;
9112 if (code == LT && TARGET_IEEE_FP)
9114 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9115 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9116 intcmp_mode = CCmode;
9121 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9127 if (code == GE || !TARGET_IEEE_FP)
9129 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9134 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9135 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9142 if (code == LE && TARGET_IEEE_FP)
9144 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9145 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9146 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9147 intcmp_mode = CCmode;
9152 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9158 if (code == EQ && TARGET_IEEE_FP)
9160 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9161 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9162 intcmp_mode = CCmode;
9167 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9174 if (code == NE && TARGET_IEEE_FP)
9176 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9177 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9189 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9193 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9202 /* Return the test that should be put into the flags user, i.e.
9203 the bcc, scc, or cmov instruction. */
9204 return gen_rtx_fmt_ee (code, VOIDmode,
9205 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9210 ix86_expand_compare (code, second_test, bypass_test)
9212 rtx *second_test, *bypass_test;
9215 op0 = ix86_compare_op0;
9216 op1 = ix86_compare_op1;
9219 *second_test = NULL_RTX;
9221 *bypass_test = NULL_RTX;
9223 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9224 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9225 second_test, bypass_test);
9227 ret = ix86_expand_int_compare (code, op0, op1);
9232 /* Return true if the CODE will result in nontrivial jump sequence. */
9234 ix86_fp_jump_nontrivial_p (code)
9237 enum rtx_code bypass_code, first_code, second_code;
9240 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9241 return bypass_code != NIL || second_code != NIL;
9245 ix86_expand_branch (code, label)
9251 switch (GET_MODE (ix86_compare_op0))
9257 tmp = ix86_expand_compare (code, NULL, NULL);
9258 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9259 gen_rtx_LABEL_REF (VOIDmode, label),
9261 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9271 enum rtx_code bypass_code, first_code, second_code;
9273 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9276 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9278 /* Check whether we will use the natural sequence with one jump. If
9279 so, we can expand jump early. Otherwise delay expansion by
9280 creating compound insn to not confuse optimizers. */
9281 if (bypass_code == NIL && second_code == NIL
9284 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9285 gen_rtx_LABEL_REF (VOIDmode, label),
9290 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9291 ix86_compare_op0, ix86_compare_op1);
9292 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9293 gen_rtx_LABEL_REF (VOIDmode, label),
9295 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9297 use_fcomi = ix86_use_fcomi_compare (code);
9298 vec = rtvec_alloc (3 + !use_fcomi);
9299 RTVEC_ELT (vec, 0) = tmp;
9301 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9303 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9306 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9308 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9316 /* Expand DImode branch into multiple compare+branch. */
9318 rtx lo[2], hi[2], label2;
9319 enum rtx_code code1, code2, code3;
9321 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9323 tmp = ix86_compare_op0;
9324 ix86_compare_op0 = ix86_compare_op1;
9325 ix86_compare_op1 = tmp;
9326 code = swap_condition (code);
9328 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9329 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9331 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9332 avoid two branches. This costs one extra insn, so disable when
9333 optimizing for size. */
9335 if ((code == EQ || code == NE)
9337 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9342 if (hi[1] != const0_rtx)
9343 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9344 NULL_RTX, 0, OPTAB_WIDEN);
9347 if (lo[1] != const0_rtx)
9348 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9349 NULL_RTX, 0, OPTAB_WIDEN);
9351 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9352 NULL_RTX, 0, OPTAB_WIDEN);
9354 ix86_compare_op0 = tmp;
9355 ix86_compare_op1 = const0_rtx;
9356 ix86_expand_branch (code, label);
9360 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9361 op1 is a constant and the low word is zero, then we can just
9362 examine the high word. */
9364 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9367 case LT: case LTU: case GE: case GEU:
9368 ix86_compare_op0 = hi[0];
9369 ix86_compare_op1 = hi[1];
9370 ix86_expand_branch (code, label);
9376 /* Otherwise, we need two or three jumps. */
9378 label2 = gen_label_rtx ();
9381 code2 = swap_condition (code);
9382 code3 = unsigned_condition (code);
9386 case LT: case GT: case LTU: case GTU:
9389 case LE: code1 = LT; code2 = GT; break;
9390 case GE: code1 = GT; code2 = LT; break;
9391 case LEU: code1 = LTU; code2 = GTU; break;
9392 case GEU: code1 = GTU; code2 = LTU; break;
9394 case EQ: code1 = NIL; code2 = NE; break;
9395 case NE: code2 = NIL; break;
9403 * if (hi(a) < hi(b)) goto true;
9404 * if (hi(a) > hi(b)) goto false;
9405 * if (lo(a) < lo(b)) goto true;
9409 ix86_compare_op0 = hi[0];
9410 ix86_compare_op1 = hi[1];
9413 ix86_expand_branch (code1, label);
9415 ix86_expand_branch (code2, label2);
9417 ix86_compare_op0 = lo[0];
9418 ix86_compare_op1 = lo[1];
9419 ix86_expand_branch (code3, label);
9422 emit_label (label2);
9431 /* Split branch based on floating point condition. */
9433 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9435 rtx op1, op2, target1, target2, tmp;
9438 rtx label = NULL_RTX;
9440 int bypass_probability = -1, second_probability = -1, probability = -1;
9443 if (target2 != pc_rtx)
9446 code = reverse_condition_maybe_unordered (code);
9451 condition = ix86_expand_fp_compare (code, op1, op2,
9452 tmp, &second, &bypass);
9454 if (split_branch_probability >= 0)
9456 /* Distribute the probabilities across the jumps.
9457 Assume the BYPASS and SECOND to be always test
9459 probability = split_branch_probability;
9461 /* Value of 1 is low enough to make no need for probability
9462 to be updated. Later we may run some experiments and see
9463 if unordered values are more frequent in practice. */
9465 bypass_probability = 1;
9467 second_probability = 1;
9469 if (bypass != NULL_RTX)
9471 label = gen_label_rtx ();
9472 i = emit_jump_insn (gen_rtx_SET
9474 gen_rtx_IF_THEN_ELSE (VOIDmode,
9476 gen_rtx_LABEL_REF (VOIDmode,
9479 if (bypass_probability >= 0)
9481 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9482 GEN_INT (bypass_probability),
9485 i = emit_jump_insn (gen_rtx_SET
9487 gen_rtx_IF_THEN_ELSE (VOIDmode,
9488 condition, target1, target2)));
9489 if (probability >= 0)
9491 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9492 GEN_INT (probability),
9494 if (second != NULL_RTX)
9496 i = emit_jump_insn (gen_rtx_SET
9498 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9500 if (second_probability >= 0)
9502 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9503 GEN_INT (second_probability),
9506 if (label != NULL_RTX)
9511 ix86_expand_setcc (code, dest)
9515 rtx ret, tmp, tmpreg;
9516 rtx second_test, bypass_test;
9518 if (GET_MODE (ix86_compare_op0) == DImode
9520 return 0; /* FAIL */
9522 if (GET_MODE (dest) != QImode)
9525 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9526 PUT_MODE (ret, QImode);
9531 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9532 if (bypass_test || second_test)
9534 rtx test = second_test;
9536 rtx tmp2 = gen_reg_rtx (QImode);
9543 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9545 PUT_MODE (test, QImode);
9546 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9549 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9551 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9554 return 1; /* DONE */
9557 /* Expand comparison setting or clearing carry flag. Return true when successful
9558 and set pop for the operation. */
9560 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9564 enum machine_mode mode =
9565 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9567 /* Do not handle DImode compares that go trought special path. Also we can't
9568 deal with FP compares yet. This is possible to add. */
9569 if ((mode == DImode && !TARGET_64BIT))
9571 if (FLOAT_MODE_P (mode))
9573 rtx second_test = NULL, bypass_test = NULL;
9574 rtx compare_op, compare_seq;
9576 /* Shortcut: following common codes never translate into carry flag compares. */
9577 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9578 || code == ORDERED || code == UNORDERED)
9581 /* These comparisons require zero flag; swap operands so they won't. */
9582 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9588 code = swap_condition (code);
9591 /* Try to expand the comparsion and verify that we end up with carry flag
9592 based comparsion. This is fails to be true only when we decide to expand
9593 comparsion using arithmetic that is not too common scenario. */
9595 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9596 &second_test, &bypass_test);
9597 compare_seq = get_insns ();
9600 if (second_test || bypass_test)
9602 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9603 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9604 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9606 code = GET_CODE (compare_op);
9607 if (code != LTU && code != GEU)
9609 emit_insn (compare_seq);
9613 if (!INTEGRAL_MODE_P (mode))
9621 /* Convert a==0 into (unsigned)a<1. */
9624 if (op1 != const0_rtx)
9627 code = (code == EQ ? LTU : GEU);
9630 /* Convert a>b into b<a or a>=b-1. */
9633 if (GET_CODE (op1) == CONST_INT)
9635 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9636 /* Bail out on overflow. We still can swap operands but that
9637 would force loading of the constant into register. */
9638 if (op1 == const0_rtx
9639 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9641 code = (code == GTU ? GEU : LTU);
9648 code = (code == GTU ? LTU : GEU);
9652 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9655 if (mode == DImode || op1 != const0_rtx)
9657 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9658 code = (code == LT ? GEU : LTU);
9662 if (mode == DImode || op1 != constm1_rtx)
9664 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9665 code = (code == LE ? GEU : LTU);
9671 ix86_compare_op0 = op0;
9672 ix86_compare_op1 = op1;
9673 *pop = ix86_expand_compare (code, NULL, NULL);
9674 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9680 ix86_expand_int_movcc (operands)
9683 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9684 rtx compare_seq, compare_op;
9685 rtx second_test, bypass_test;
9686 enum machine_mode mode = GET_MODE (operands[0]);
9687 bool sign_bit_compare_p = false;;
9690 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9691 compare_seq = get_insns ();
9694 compare_code = GET_CODE (compare_op);
9696 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9697 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9698 sign_bit_compare_p = true;
9700 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9701 HImode insns, we'd be swallowed in word prefix ops. */
9703 if ((mode != HImode || TARGET_FAST_PREFIX)
9704 && (mode != DImode || TARGET_64BIT)
9705 && GET_CODE (operands[2]) == CONST_INT
9706 && GET_CODE (operands[3]) == CONST_INT)
9708 rtx out = operands[0];
9709 HOST_WIDE_INT ct = INTVAL (operands[2]);
9710 HOST_WIDE_INT cf = INTVAL (operands[3]);
9714 /* Sign bit compares are better done using shifts than we do by using
9716 if (sign_bit_compare_p
9717 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9718 ix86_compare_op1, &compare_op))
9720 /* Detect overlap between destination and compare sources. */
9723 if (!sign_bit_compare_p)
9727 compare_code = GET_CODE (compare_op);
9729 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9730 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9733 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9736 /* To simplify rest of code, restrict to the GEU case. */
9737 if (compare_code == LTU)
9739 HOST_WIDE_INT tmp = ct;
9742 compare_code = reverse_condition (compare_code);
9743 code = reverse_condition (code);
9748 PUT_CODE (compare_op,
9749 reverse_condition_maybe_unordered
9750 (GET_CODE (compare_op)));
9752 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9756 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9757 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9758 tmp = gen_reg_rtx (mode);
9761 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9763 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9767 if (code == GT || code == GE)
9768 code = reverse_condition (code);
9771 HOST_WIDE_INT tmp = ct;
9776 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9777 ix86_compare_op1, VOIDmode, 0, -1);
9790 tmp = expand_simple_binop (mode, PLUS,
9792 copy_rtx (tmp), 1, OPTAB_DIRECT);
9803 tmp = expand_simple_binop (mode, IOR,
9805 copy_rtx (tmp), 1, OPTAB_DIRECT);
9807 else if (diff == -1 && ct)
9817 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9819 tmp = expand_simple_binop (mode, PLUS,
9820 copy_rtx (tmp), GEN_INT (cf),
9821 copy_rtx (tmp), 1, OPTAB_DIRECT);
9829 * andl cf - ct, dest
9839 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9842 tmp = expand_simple_binop (mode, AND,
9844 gen_int_mode (cf - ct, mode),
9845 copy_rtx (tmp), 1, OPTAB_DIRECT);
9847 tmp = expand_simple_binop (mode, PLUS,
9848 copy_rtx (tmp), GEN_INT (ct),
9849 copy_rtx (tmp), 1, OPTAB_DIRECT);
9852 if (!rtx_equal_p (tmp, out))
9853 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9855 return 1; /* DONE */
9861 tmp = ct, ct = cf, cf = tmp;
9863 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9865 /* We may be reversing unordered compare to normal compare, that
9866 is not valid in general (we may convert non-trapping condition
9867 to trapping one), however on i386 we currently emit all
9868 comparisons unordered. */
9869 compare_code = reverse_condition_maybe_unordered (compare_code);
9870 code = reverse_condition_maybe_unordered (code);
9874 compare_code = reverse_condition (compare_code);
9875 code = reverse_condition (code);
9880 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9881 && GET_CODE (ix86_compare_op1) == CONST_INT)
9883 if (ix86_compare_op1 == const0_rtx
9884 && (code == LT || code == GE))
9885 compare_code = code;
9886 else if (ix86_compare_op1 == constm1_rtx)
9890 else if (code == GT)
9895 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9896 if (compare_code != NIL
9897 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9898 && (cf == -1 || ct == -1))
9900 /* If lea code below could be used, only optimize
9901 if it results in a 2 insn sequence. */
9903 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9904 || diff == 3 || diff == 5 || diff == 9)
9905 || (compare_code == LT && ct == -1)
9906 || (compare_code == GE && cf == -1))
9909 * notl op1 (if necessary)
9917 code = reverse_condition (code);
9920 out = emit_store_flag (out, code, ix86_compare_op0,
9921 ix86_compare_op1, VOIDmode, 0, -1);
9923 out = expand_simple_binop (mode, IOR,
9925 out, 1, OPTAB_DIRECT);
9926 if (out != operands[0])
9927 emit_move_insn (operands[0], out);
9929 return 1; /* DONE */
9934 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9935 || diff == 3 || diff == 5 || diff == 9)
9936 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9937 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9943 * lea cf(dest*(ct-cf)),dest
9947 * This also catches the degenerate setcc-only case.
9953 out = emit_store_flag (out, code, ix86_compare_op0,
9954 ix86_compare_op1, VOIDmode, 0, 1);
9957 /* On x86_64 the lea instruction operates on Pmode, so we need
9958 to get arithmetics done in proper mode to match. */
9960 tmp = copy_rtx (out);
9964 out1 = copy_rtx (out);
9965 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9969 tmp = gen_rtx_PLUS (mode, tmp, out1);
9975 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9978 if (!rtx_equal_p (tmp, out))
9981 out = force_operand (tmp, copy_rtx (out));
9983 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9985 if (!rtx_equal_p (out, operands[0]))
9986 emit_move_insn (operands[0], copy_rtx (out));
9988 return 1; /* DONE */
9992 * General case: Jumpful:
9993 * xorl dest,dest cmpl op1, op2
9994 * cmpl op1, op2 movl ct, dest
9996 * decl dest movl cf, dest
9997 * andl (cf-ct),dest 1:
10000 * Size 20. Size 14.
10002 * This is reasonably steep, but branch mispredict costs are
10003 * high on modern cpus, so consider failing only if optimizing
10007 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10008 && BRANCH_COST >= 2)
10014 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10015 /* We may be reversing unordered compare to normal compare,
10016 that is not valid in general (we may convert non-trapping
10017 condition to trapping one), however on i386 we currently
10018 emit all comparisons unordered. */
10019 code = reverse_condition_maybe_unordered (code);
10022 code = reverse_condition (code);
10023 if (compare_code != NIL)
10024 compare_code = reverse_condition (compare_code);
10028 if (compare_code != NIL)
10030 /* notl op1 (if needed)
10035 For x < 0 (resp. x <= -1) there will be no notl,
10036 so if possible swap the constants to get rid of the
10038 True/false will be -1/0 while code below (store flag
10039 followed by decrement) is 0/-1, so the constants need
10040 to be exchanged once more. */
10042 if (compare_code == GE || !cf)
10044 code = reverse_condition (code);
10049 HOST_WIDE_INT tmp = cf;
10054 out = emit_store_flag (out, code, ix86_compare_op0,
10055 ix86_compare_op1, VOIDmode, 0, -1);
10059 out = emit_store_flag (out, code, ix86_compare_op0,
10060 ix86_compare_op1, VOIDmode, 0, 1);
10062 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10063 copy_rtx (out), 1, OPTAB_DIRECT);
10066 out = expand_simple_binop (mode, AND, copy_rtx (out),
10067 gen_int_mode (cf - ct, mode),
10068 copy_rtx (out), 1, OPTAB_DIRECT);
10070 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10071 copy_rtx (out), 1, OPTAB_DIRECT);
10072 if (!rtx_equal_p (out, operands[0]))
10073 emit_move_insn (operands[0], copy_rtx (out));
10075 return 1; /* DONE */
10079 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10081 /* Try a few things more with specific constants and a variable. */
10084 rtx var, orig_out, out, tmp;
10086 if (BRANCH_COST <= 2)
10087 return 0; /* FAIL */
10089 /* If one of the two operands is an interesting constant, load a
10090 constant with the above and mask it in with a logical operation. */
10092 if (GET_CODE (operands[2]) == CONST_INT)
10095 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10096 operands[3] = constm1_rtx, op = and_optab;
10097 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10098 operands[3] = const0_rtx, op = ior_optab;
10100 return 0; /* FAIL */
10102 else if (GET_CODE (operands[3]) == CONST_INT)
10105 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10106 operands[2] = constm1_rtx, op = and_optab;
10107 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10108 operands[2] = const0_rtx, op = ior_optab;
10110 return 0; /* FAIL */
10113 return 0; /* FAIL */
10115 orig_out = operands[0];
10116 tmp = gen_reg_rtx (mode);
10119 /* Recurse to get the constant loaded. */
10120 if (ix86_expand_int_movcc (operands) == 0)
10121 return 0; /* FAIL */
10123 /* Mask in the interesting variable. */
10124 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10126 if (!rtx_equal_p (out, orig_out))
10127 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10129 return 1; /* DONE */
10133 * For comparison with above,
10143 if (! nonimmediate_operand (operands[2], mode))
10144 operands[2] = force_reg (mode, operands[2]);
10145 if (! nonimmediate_operand (operands[3], mode))
10146 operands[3] = force_reg (mode, operands[3]);
10148 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10150 rtx tmp = gen_reg_rtx (mode);
10151 emit_move_insn (tmp, operands[3]);
10154 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10156 rtx tmp = gen_reg_rtx (mode);
10157 emit_move_insn (tmp, operands[2]);
10161 if (! register_operand (operands[2], VOIDmode)
10163 || ! register_operand (operands[3], VOIDmode)))
10164 operands[2] = force_reg (mode, operands[2]);
10167 && ! register_operand (operands[3], VOIDmode))
10168 operands[3] = force_reg (mode, operands[3]);
10170 emit_insn (compare_seq);
10171 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10172 gen_rtx_IF_THEN_ELSE (mode,
10173 compare_op, operands[2],
10176 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10177 gen_rtx_IF_THEN_ELSE (mode,
10179 copy_rtx (operands[3]),
10180 copy_rtx (operands[0]))));
10182 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10183 gen_rtx_IF_THEN_ELSE (mode,
10185 copy_rtx (operands[2]),
10186 copy_rtx (operands[0]))));
10188 return 1; /* DONE */
10192 ix86_expand_fp_movcc (operands)
10195 enum rtx_code code;
10197 rtx compare_op, second_test, bypass_test;
10199 /* For SF/DFmode conditional moves based on comparisons
10200 in same mode, we may want to use SSE min/max instructions. */
10201 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10202 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10203 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10204 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10205 && (!TARGET_IEEE_FP
10206 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10207 /* We may be called from the post-reload splitter. */
10208 && (!REG_P (operands[0])
10209 || SSE_REG_P (operands[0])
10210 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10212 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10213 code = GET_CODE (operands[1]);
10215 /* See if we have (cross) match between comparison operands and
10216 conditional move operands. */
10217 if (rtx_equal_p (operands[2], op1))
10222 code = reverse_condition_maybe_unordered (code);
10224 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10226 /* Check for min operation. */
10227 if (code == LT || code == UNLE)
10235 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10236 if (memory_operand (op0, VOIDmode))
10237 op0 = force_reg (GET_MODE (operands[0]), op0);
10238 if (GET_MODE (operands[0]) == SFmode)
10239 emit_insn (gen_minsf3 (operands[0], op0, op1));
10241 emit_insn (gen_mindf3 (operands[0], op0, op1));
10244 /* Check for max operation. */
10245 if (code == GT || code == UNGE)
10253 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10254 if (memory_operand (op0, VOIDmode))
10255 op0 = force_reg (GET_MODE (operands[0]), op0);
10256 if (GET_MODE (operands[0]) == SFmode)
10257 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10259 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10263 /* Manage condition to be sse_comparison_operator. In case we are
10264 in non-ieee mode, try to canonicalize the destination operand
10265 to be first in the comparison - this helps reload to avoid extra
10267 if (!sse_comparison_operator (operands[1], VOIDmode)
10268 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10270 rtx tmp = ix86_compare_op0;
10271 ix86_compare_op0 = ix86_compare_op1;
10272 ix86_compare_op1 = tmp;
10273 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10274 VOIDmode, ix86_compare_op0,
10277 /* Similarly try to manage result to be first operand of conditional
10278 move. We also don't support the NE comparison on SSE, so try to
10280 if ((rtx_equal_p (operands[0], operands[3])
10281 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10282 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10284 rtx tmp = operands[2];
10285 operands[2] = operands[3];
10287 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10288 (GET_CODE (operands[1])),
10289 VOIDmode, ix86_compare_op0,
10292 if (GET_MODE (operands[0]) == SFmode)
10293 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10294 operands[2], operands[3],
10295 ix86_compare_op0, ix86_compare_op1));
10297 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10298 operands[2], operands[3],
10299 ix86_compare_op0, ix86_compare_op1));
10303 /* The floating point conditional move instructions don't directly
10304 support conditions resulting from a signed integer comparison. */
10306 code = GET_CODE (operands[1]);
10307 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10309 /* The floating point conditional move instructions don't directly
10310 support signed integer comparisons. */
10312 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10314 if (second_test != NULL || bypass_test != NULL)
10316 tmp = gen_reg_rtx (QImode);
10317 ix86_expand_setcc (code, tmp);
10319 ix86_compare_op0 = tmp;
10320 ix86_compare_op1 = const0_rtx;
10321 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10323 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10325 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10326 emit_move_insn (tmp, operands[3]);
10329 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10331 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10332 emit_move_insn (tmp, operands[2]);
10336 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10337 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10342 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10343 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10348 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10349 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10357 /* Expand conditional increment or decrement using adb/sbb instructions.
10358 The default case using setcc followed by the conditional move can be
10359 done by generic code. */
10361 ix86_expand_int_addcc (operands)
10364 enum rtx_code code = GET_CODE (operands[1]);
10366 rtx val = const0_rtx;
10367 bool fpcmp = false;
10368 enum machine_mode mode = GET_MODE (operands[0]);
10370 if (operands[3] != const1_rtx
10371 && operands[3] != constm1_rtx)
10373 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10374 ix86_compare_op1, &compare_op))
10376 code = GET_CODE (compare_op);
10378 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10379 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10382 code = ix86_fp_compare_code_to_integer (code);
10389 PUT_CODE (compare_op,
10390 reverse_condition_maybe_unordered
10391 (GET_CODE (compare_op)));
10393 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10395 PUT_MODE (compare_op, mode);
10397 /* Construct either adc or sbb insn. */
10398 if ((code == LTU) == (operands[3] == constm1_rtx))
10400 switch (GET_MODE (operands[0]))
10403 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10406 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10409 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10412 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10420 switch (GET_MODE (operands[0]))
10423 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10426 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10429 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10432 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10438 return 1; /* DONE */
10442 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10443 works for floating pointer parameters and nonoffsetable memories.
10444 For pushes, it returns just stack offsets; the values will be saved
10445 in the right order. Maximally three parts are generated. */
10448 ix86_split_to_parts (operand, parts, mode)
10451 enum machine_mode mode;
10456 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10458 size = (GET_MODE_SIZE (mode) + 4) / 8;
10460 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10462 if (size < 2 || size > 3)
10465 /* Optimize constant pool reference to immediates. This is used by fp
10466 moves, that force all constants to memory to allow combining. */
10467 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10469 rtx tmp = maybe_get_pool_constant (operand);
10474 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10476 /* The only non-offsetable memories we handle are pushes. */
10477 if (! push_operand (operand, VOIDmode))
10480 operand = copy_rtx (operand);
10481 PUT_MODE (operand, Pmode);
10482 parts[0] = parts[1] = parts[2] = operand;
10484 else if (!TARGET_64BIT)
10486 if (mode == DImode)
10487 split_di (&operand, 1, &parts[0], &parts[1]);
10490 if (REG_P (operand))
10492 if (!reload_completed)
10494 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10495 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10497 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10499 else if (offsettable_memref_p (operand))
10501 operand = adjust_address (operand, SImode, 0);
10502 parts[0] = operand;
10503 parts[1] = adjust_address (operand, SImode, 4);
10505 parts[2] = adjust_address (operand, SImode, 8);
10507 else if (GET_CODE (operand) == CONST_DOUBLE)
10512 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10517 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10518 parts[2] = gen_int_mode (l[2], SImode);
10521 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10526 parts[1] = gen_int_mode (l[1], SImode);
10527 parts[0] = gen_int_mode (l[0], SImode);
10535 if (mode == TImode)
10536 split_ti (&operand, 1, &parts[0], &parts[1]);
10537 if (mode == XFmode || mode == TFmode)
10539 if (REG_P (operand))
10541 if (!reload_completed)
10543 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10544 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10546 else if (offsettable_memref_p (operand))
10548 operand = adjust_address (operand, DImode, 0);
10549 parts[0] = operand;
10550 parts[1] = adjust_address (operand, SImode, 8);
10552 else if (GET_CODE (operand) == CONST_DOUBLE)
10557 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10558 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10559 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10560 if (HOST_BITS_PER_WIDE_INT >= 64)
10563 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10564 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10567 parts[0] = immed_double_const (l[0], l[1], DImode);
10568 parts[1] = gen_int_mode (l[2], SImode);
10578 /* Emit insns to perform a move or push of DI, DF, and XF values.
10579 Return false when normal moves are needed; true when all required
10580 insns have been emitted. Operands 2-4 contain the input values
10581 int the correct order; operands 5-7 contain the output values. */
10584 ix86_split_long_move (operands)
10590 int collisions = 0;
10591 enum machine_mode mode = GET_MODE (operands[0]);
10593 /* The DFmode expanders may ask us to move double.
10594 For 64bit target this is single move. By hiding the fact
10595 here we simplify i386.md splitters. */
10596 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10598 /* Optimize constant pool reference to immediates. This is used by
10599 fp moves, that force all constants to memory to allow combining. */
10601 if (GET_CODE (operands[1]) == MEM
10602 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10603 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10604 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10605 if (push_operand (operands[0], VOIDmode))
10607 operands[0] = copy_rtx (operands[0]);
10608 PUT_MODE (operands[0], Pmode);
10611 operands[0] = gen_lowpart (DImode, operands[0]);
10612 operands[1] = gen_lowpart (DImode, operands[1]);
10613 emit_move_insn (operands[0], operands[1]);
10617 /* The only non-offsettable memory we handle is push. */
10618 if (push_operand (operands[0], VOIDmode))
10620 else if (GET_CODE (operands[0]) == MEM
10621 && ! offsettable_memref_p (operands[0]))
10624 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10625 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10627 /* When emitting push, take care for source operands on the stack. */
10628 if (push && GET_CODE (operands[1]) == MEM
10629 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10632 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10633 XEXP (part[1][2], 0));
10634 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10635 XEXP (part[1][1], 0));
10638 /* We need to do copy in the right order in case an address register
10639 of the source overlaps the destination. */
10640 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10642 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10644 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10647 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10650 /* Collision in the middle part can be handled by reordering. */
10651 if (collisions == 1 && nparts == 3
10652 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10655 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10656 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10659 /* If there are more collisions, we can't handle it by reordering.
10660 Do an lea to the last part and use only one colliding move. */
10661 else if (collisions > 1)
10664 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10665 XEXP (part[1][0], 0)));
10666 part[1][0] = change_address (part[1][0],
10667 TARGET_64BIT ? DImode : SImode,
10668 part[0][nparts - 1]);
10669 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10671 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10681 /* We use only first 12 bytes of TFmode value, but for pushing we
10682 are required to adjust stack as if we were pushing real 16byte
10684 if (mode == TFmode && !TARGET_64BIT)
10685 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10687 emit_move_insn (part[0][2], part[1][2]);
10692 /* In 64bit mode we don't have 32bit push available. In case this is
10693 register, it is OK - we will just use larger counterpart. We also
10694 retype memory - these comes from attempt to avoid REX prefix on
10695 moving of second half of TFmode value. */
10696 if (GET_MODE (part[1][1]) == SImode)
10698 if (GET_CODE (part[1][1]) == MEM)
10699 part[1][1] = adjust_address (part[1][1], DImode, 0);
10700 else if (REG_P (part[1][1]))
10701 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10704 if (GET_MODE (part[1][0]) == SImode)
10705 part[1][0] = part[1][1];
10708 emit_move_insn (part[0][1], part[1][1]);
10709 emit_move_insn (part[0][0], part[1][0]);
10713 /* Choose correct order to not overwrite the source before it is copied. */
10714 if ((REG_P (part[0][0])
10715 && REG_P (part[1][1])
10716 && (REGNO (part[0][0]) == REGNO (part[1][1])
10718 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10720 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10724 operands[2] = part[0][2];
10725 operands[3] = part[0][1];
10726 operands[4] = part[0][0];
10727 operands[5] = part[1][2];
10728 operands[6] = part[1][1];
10729 operands[7] = part[1][0];
10733 operands[2] = part[0][1];
10734 operands[3] = part[0][0];
10735 operands[5] = part[1][1];
10736 operands[6] = part[1][0];
10743 operands[2] = part[0][0];
10744 operands[3] = part[0][1];
10745 operands[4] = part[0][2];
10746 operands[5] = part[1][0];
10747 operands[6] = part[1][1];
10748 operands[7] = part[1][2];
10752 operands[2] = part[0][0];
10753 operands[3] = part[0][1];
10754 operands[5] = part[1][0];
10755 operands[6] = part[1][1];
10758 emit_move_insn (operands[2], operands[5]);
10759 emit_move_insn (operands[3], operands[6]);
10761 emit_move_insn (operands[4], operands[7]);
10767 ix86_split_ashldi (operands, scratch)
10768 rtx *operands, scratch;
10770 rtx low[2], high[2];
10773 if (GET_CODE (operands[2]) == CONST_INT)
10775 split_di (operands, 2, low, high);
10776 count = INTVAL (operands[2]) & 63;
10780 emit_move_insn (high[0], low[1]);
10781 emit_move_insn (low[0], const0_rtx);
10784 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10788 if (!rtx_equal_p (operands[0], operands[1]))
10789 emit_move_insn (operands[0], operands[1]);
10790 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10791 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10796 if (!rtx_equal_p (operands[0], operands[1]))
10797 emit_move_insn (operands[0], operands[1]);
10799 split_di (operands, 1, low, high);
10801 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10802 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10804 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10806 if (! no_new_pseudos)
10807 scratch = force_reg (SImode, const0_rtx);
10809 emit_move_insn (scratch, const0_rtx);
10811 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10815 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10820 ix86_split_ashrdi (operands, scratch)
10821 rtx *operands, scratch;
10823 rtx low[2], high[2];
10826 if (GET_CODE (operands[2]) == CONST_INT)
10828 split_di (operands, 2, low, high);
10829 count = INTVAL (operands[2]) & 63;
10833 emit_move_insn (low[0], high[1]);
10835 if (! reload_completed)
10836 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10839 emit_move_insn (high[0], low[0]);
10840 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10844 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10848 if (!rtx_equal_p (operands[0], operands[1]))
10849 emit_move_insn (operands[0], operands[1]);
10850 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10851 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10856 if (!rtx_equal_p (operands[0], operands[1]))
10857 emit_move_insn (operands[0], operands[1]);
10859 split_di (operands, 1, low, high);
10861 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10862 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10864 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10866 if (! no_new_pseudos)
10867 scratch = gen_reg_rtx (SImode);
10868 emit_move_insn (scratch, high[0]);
10869 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10870 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10874 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10879 ix86_split_lshrdi (operands, scratch)
10880 rtx *operands, scratch;
10882 rtx low[2], high[2];
10885 if (GET_CODE (operands[2]) == CONST_INT)
10887 split_di (operands, 2, low, high);
10888 count = INTVAL (operands[2]) & 63;
10892 emit_move_insn (low[0], high[1]);
10893 emit_move_insn (high[0], const0_rtx);
10896 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10900 if (!rtx_equal_p (operands[0], operands[1]))
10901 emit_move_insn (operands[0], operands[1]);
10902 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10903 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10908 if (!rtx_equal_p (operands[0], operands[1]))
10909 emit_move_insn (operands[0], operands[1]);
10911 split_di (operands, 1, low, high);
10913 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10914 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10916 /* Heh. By reversing the arguments, we can reuse this pattern. */
10917 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10919 if (! no_new_pseudos)
10920 scratch = force_reg (SImode, const0_rtx);
10922 emit_move_insn (scratch, const0_rtx);
10924 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10928 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10932 /* Helper function for the string operations below. Dest VARIABLE whether
10933 it is aligned to VALUE bytes. If true, jump to the label. */
10935 ix86_expand_aligntest (variable, value)
10939 rtx label = gen_label_rtx ();
10940 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10941 if (GET_MODE (variable) == DImode)
10942 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10944 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10945 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10950 /* Adjust COUNTER by the VALUE. */
10952 ix86_adjust_counter (countreg, value)
10954 HOST_WIDE_INT value;
10956 if (GET_MODE (countreg) == DImode)
10957 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10959 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10962 /* Zero extend possibly SImode EXP to Pmode register. */
10964 ix86_zero_extend_to_Pmode (exp)
10968 if (GET_MODE (exp) == VOIDmode)
10969 return force_reg (Pmode, exp);
10970 if (GET_MODE (exp) == Pmode)
10971 return copy_to_mode_reg (Pmode, exp);
10972 r = gen_reg_rtx (Pmode);
10973 emit_insn (gen_zero_extendsidi2 (r, exp));
10977 /* Expand string move (memcpy) operation. Use i386 string operations when
10978 profitable. expand_clrstr contains similar code. */
10980 ix86_expand_movstr (dst, src, count_exp, align_exp)
10981 rtx dst, src, count_exp, align_exp;
10983 rtx srcreg, destreg, countreg;
10984 enum machine_mode counter_mode;
10985 HOST_WIDE_INT align = 0;
10986 unsigned HOST_WIDE_INT count = 0;
10989 if (GET_CODE (align_exp) == CONST_INT)
10990 align = INTVAL (align_exp);
10992 /* Can't use any of this if the user has appropriated esi or edi. */
10993 if (global_regs[4] || global_regs[5])
10996 /* This simple hack avoids all inlining code and simplifies code below. */
10997 if (!TARGET_ALIGN_STRINGOPS)
11000 if (GET_CODE (count_exp) == CONST_INT)
11002 count = INTVAL (count_exp);
11003 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11007 /* Figure out proper mode for counter. For 32bits it is always SImode,
11008 for 64bits use SImode when possible, otherwise DImode.
11009 Set count to number of bytes copied when known at compile time. */
11010 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11011 || x86_64_zero_extended_value (count_exp))
11012 counter_mode = SImode;
11014 counter_mode = DImode;
11018 if (counter_mode != SImode && counter_mode != DImode)
11021 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11022 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11024 emit_insn (gen_cld ());
11026 /* When optimizing for size emit simple rep ; movsb instruction for
11027 counts not divisible by 4. */
11029 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11031 countreg = ix86_zero_extend_to_Pmode (count_exp);
11033 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11034 destreg, srcreg, countreg));
11036 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11037 destreg, srcreg, countreg));
11040 /* For constant aligned (or small unaligned) copies use rep movsl
11041 followed by code copying the rest. For PentiumPro ensure 8 byte
11042 alignment to allow rep movsl acceleration. */
11044 else if (count != 0
11046 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11047 || optimize_size || count < (unsigned int) 64))
11049 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11050 if (count & ~(size - 1))
11052 countreg = copy_to_mode_reg (counter_mode,
11053 GEN_INT ((count >> (size == 4 ? 2 : 3))
11054 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11055 countreg = ix86_zero_extend_to_Pmode (countreg);
11059 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11060 destreg, srcreg, countreg));
11062 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11063 destreg, srcreg, countreg));
11066 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11067 destreg, srcreg, countreg));
11069 if (size == 8 && (count & 0x04))
11070 emit_insn (gen_strmovsi (destreg, srcreg));
11072 emit_insn (gen_strmovhi (destreg, srcreg));
11074 emit_insn (gen_strmovqi (destreg, srcreg));
11076 /* The generic code based on the glibc implementation:
11077 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11078 allowing accelerated copying there)
11079 - copy the data using rep movsl
11080 - copy the rest. */
11085 int desired_alignment = (TARGET_PENTIUMPRO
11086 && (count == 0 || count >= (unsigned int) 260)
11087 ? 8 : UNITS_PER_WORD);
11089 /* In case we don't know anything about the alignment, default to
11090 library version, since it is usually equally fast and result in
11093 Also emit call when we know that the count is large and call overhead
11094 will not be important. */
11095 if (!TARGET_INLINE_ALL_STRINGOPS
11096 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11102 if (TARGET_SINGLE_STRINGOP)
11103 emit_insn (gen_cld ());
11105 countreg2 = gen_reg_rtx (Pmode);
11106 countreg = copy_to_mode_reg (counter_mode, count_exp);
11108 /* We don't use loops to align destination and to copy parts smaller
11109 than 4 bytes, because gcc is able to optimize such code better (in
11110 the case the destination or the count really is aligned, gcc is often
11111 able to predict the branches) and also it is friendlier to the
11112 hardware branch prediction.
11114 Using loops is beneficial for generic case, because we can
11115 handle small counts using the loops. Many CPUs (such as Athlon)
11116 have large REP prefix setup costs.
11118 This is quite costly. Maybe we can revisit this decision later or
11119 add some customizability to this code. */
11121 if (count == 0 && align < desired_alignment)
11123 label = gen_label_rtx ();
11124 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11125 LEU, 0, counter_mode, 1, label);
11129 rtx label = ix86_expand_aligntest (destreg, 1);
11130 emit_insn (gen_strmovqi (destreg, srcreg));
11131 ix86_adjust_counter (countreg, 1);
11132 emit_label (label);
11133 LABEL_NUSES (label) = 1;
11137 rtx label = ix86_expand_aligntest (destreg, 2);
11138 emit_insn (gen_strmovhi (destreg, srcreg));
11139 ix86_adjust_counter (countreg, 2);
11140 emit_label (label);
11141 LABEL_NUSES (label) = 1;
11143 if (align <= 4 && desired_alignment > 4)
11145 rtx label = ix86_expand_aligntest (destreg, 4);
11146 emit_insn (gen_strmovsi (destreg, srcreg));
11147 ix86_adjust_counter (countreg, 4);
11148 emit_label (label);
11149 LABEL_NUSES (label) = 1;
11152 if (label && desired_alignment > 4 && !TARGET_64BIT)
11154 emit_label (label);
11155 LABEL_NUSES (label) = 1;
11158 if (!TARGET_SINGLE_STRINGOP)
11159 emit_insn (gen_cld ());
11162 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11164 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11165 destreg, srcreg, countreg2));
11169 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11170 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11171 destreg, srcreg, countreg2));
11176 emit_label (label);
11177 LABEL_NUSES (label) = 1;
11179 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11180 emit_insn (gen_strmovsi (destreg, srcreg));
11181 if ((align <= 4 || count == 0) && TARGET_64BIT)
11183 rtx label = ix86_expand_aligntest (countreg, 4);
11184 emit_insn (gen_strmovsi (destreg, srcreg));
11185 emit_label (label);
11186 LABEL_NUSES (label) = 1;
11188 if (align > 2 && count != 0 && (count & 2))
11189 emit_insn (gen_strmovhi (destreg, srcreg));
11190 if (align <= 2 || count == 0)
11192 rtx label = ix86_expand_aligntest (countreg, 2);
11193 emit_insn (gen_strmovhi (destreg, srcreg));
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11197 if (align > 1 && count != 0 && (count & 1))
11198 emit_insn (gen_strmovqi (destreg, srcreg));
11199 if (align <= 1 || count == 0)
11201 rtx label = ix86_expand_aligntest (countreg, 1);
11202 emit_insn (gen_strmovqi (destreg, srcreg));
11203 emit_label (label);
11204 LABEL_NUSES (label) = 1;
11208 insns = get_insns ();
11211 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11216 /* Expand string clear operation (bzero). Use i386 string operations when
11217 profitable. expand_movstr contains similar code. */
11219 ix86_expand_clrstr (src, count_exp, align_exp)
11220 rtx src, count_exp, align_exp;
11222 rtx destreg, zeroreg, countreg;
11223 enum machine_mode counter_mode;
11224 HOST_WIDE_INT align = 0;
11225 unsigned HOST_WIDE_INT count = 0;
11227 if (GET_CODE (align_exp) == CONST_INT)
11228 align = INTVAL (align_exp);
11230 /* Can't use any of this if the user has appropriated esi. */
11231 if (global_regs[4])
11234 /* This simple hack avoids all inlining code and simplifies code below. */
11235 if (!TARGET_ALIGN_STRINGOPS)
11238 if (GET_CODE (count_exp) == CONST_INT)
11240 count = INTVAL (count_exp);
11241 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11244 /* Figure out proper mode for counter. For 32bits it is always SImode,
11245 for 64bits use SImode when possible, otherwise DImode.
11246 Set count to number of bytes copied when known at compile time. */
11247 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11248 || x86_64_zero_extended_value (count_exp))
11249 counter_mode = SImode;
11251 counter_mode = DImode;
11253 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11255 emit_insn (gen_cld ());
11257 /* When optimizing for size emit simple rep ; movsb instruction for
11258 counts not divisible by 4. */
11260 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11262 countreg = ix86_zero_extend_to_Pmode (count_exp);
11263 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11265 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11266 destreg, countreg));
11268 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11269 destreg, countreg));
11271 else if (count != 0
11273 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11274 || optimize_size || count < (unsigned int) 64))
11276 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11277 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11278 if (count & ~(size - 1))
11280 countreg = copy_to_mode_reg (counter_mode,
11281 GEN_INT ((count >> (size == 4 ? 2 : 3))
11282 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11283 countreg = ix86_zero_extend_to_Pmode (countreg);
11287 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11288 destreg, countreg));
11290 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11291 destreg, countreg));
11294 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11295 destreg, countreg));
11297 if (size == 8 && (count & 0x04))
11298 emit_insn (gen_strsetsi (destreg,
11299 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11301 emit_insn (gen_strsethi (destreg,
11302 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11304 emit_insn (gen_strsetqi (destreg,
11305 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11311 /* Compute desired alignment of the string operation. */
11312 int desired_alignment = (TARGET_PENTIUMPRO
11313 && (count == 0 || count >= (unsigned int) 260)
11314 ? 8 : UNITS_PER_WORD);
11316 /* In case we don't know anything about the alignment, default to
11317 library version, since it is usually equally fast and result in
11320 Also emit call when we know that the count is large and call overhead
11321 will not be important. */
11322 if (!TARGET_INLINE_ALL_STRINGOPS
11323 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11326 if (TARGET_SINGLE_STRINGOP)
11327 emit_insn (gen_cld ());
11329 countreg2 = gen_reg_rtx (Pmode);
11330 countreg = copy_to_mode_reg (counter_mode, count_exp);
11331 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11333 if (count == 0 && align < desired_alignment)
11335 label = gen_label_rtx ();
11336 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11337 LEU, 0, counter_mode, 1, label);
11341 rtx label = ix86_expand_aligntest (destreg, 1);
11342 emit_insn (gen_strsetqi (destreg,
11343 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11344 ix86_adjust_counter (countreg, 1);
11345 emit_label (label);
11346 LABEL_NUSES (label) = 1;
11350 rtx label = ix86_expand_aligntest (destreg, 2);
11351 emit_insn (gen_strsethi (destreg,
11352 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11353 ix86_adjust_counter (countreg, 2);
11354 emit_label (label);
11355 LABEL_NUSES (label) = 1;
11357 if (align <= 4 && desired_alignment > 4)
11359 rtx label = ix86_expand_aligntest (destreg, 4);
11360 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11361 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11363 ix86_adjust_counter (countreg, 4);
11364 emit_label (label);
11365 LABEL_NUSES (label) = 1;
11368 if (label && desired_alignment > 4 && !TARGET_64BIT)
11370 emit_label (label);
11371 LABEL_NUSES (label) = 1;
11375 if (!TARGET_SINGLE_STRINGOP)
11376 emit_insn (gen_cld ());
11379 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11381 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11382 destreg, countreg2));
11386 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11387 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11388 destreg, countreg2));
11392 emit_label (label);
11393 LABEL_NUSES (label) = 1;
11396 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11397 emit_insn (gen_strsetsi (destreg,
11398 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11399 if (TARGET_64BIT && (align <= 4 || count == 0))
11401 rtx label = ix86_expand_aligntest (countreg, 4);
11402 emit_insn (gen_strsetsi (destreg,
11403 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11404 emit_label (label);
11405 LABEL_NUSES (label) = 1;
11407 if (align > 2 && count != 0 && (count & 2))
11408 emit_insn (gen_strsethi (destreg,
11409 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11410 if (align <= 2 || count == 0)
11412 rtx label = ix86_expand_aligntest (countreg, 2);
11413 emit_insn (gen_strsethi (destreg,
11414 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11415 emit_label (label);
11416 LABEL_NUSES (label) = 1;
11418 if (align > 1 && count != 0 && (count & 1))
11419 emit_insn (gen_strsetqi (destreg,
11420 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11421 if (align <= 1 || count == 0)
11423 rtx label = ix86_expand_aligntest (countreg, 1);
11424 emit_insn (gen_strsetqi (destreg,
11425 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11426 emit_label (label);
11427 LABEL_NUSES (label) = 1;
11432 /* Expand strlen. */
11434 ix86_expand_strlen (out, src, eoschar, align)
11435 rtx out, src, eoschar, align;
11437 rtx addr, scratch1, scratch2, scratch3, scratch4;
11439 /* The generic case of strlen expander is long. Avoid it's
11440 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11442 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11443 && !TARGET_INLINE_ALL_STRINGOPS
11445 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11448 addr = force_reg (Pmode, XEXP (src, 0));
11449 scratch1 = gen_reg_rtx (Pmode);
11451 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11454 /* Well it seems that some optimizer does not combine a call like
11455 foo(strlen(bar), strlen(bar));
11456 when the move and the subtraction is done here. It does calculate
11457 the length just once when these instructions are done inside of
11458 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11459 often used and I use one fewer register for the lifetime of
11460 output_strlen_unroll() this is better. */
11462 emit_move_insn (out, addr);
11464 ix86_expand_strlensi_unroll_1 (out, align);
11466 /* strlensi_unroll_1 returns the address of the zero at the end of
11467 the string, like memchr(), so compute the length by subtracting
11468 the start address. */
11470 emit_insn (gen_subdi3 (out, out, addr));
11472 emit_insn (gen_subsi3 (out, out, addr));
11476 scratch2 = gen_reg_rtx (Pmode);
11477 scratch3 = gen_reg_rtx (Pmode);
11478 scratch4 = force_reg (Pmode, constm1_rtx);
11480 emit_move_insn (scratch3, addr);
11481 eoschar = force_reg (QImode, eoschar);
11483 emit_insn (gen_cld ());
11486 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11487 align, scratch4, scratch3));
11488 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11489 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11493 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11494 align, scratch4, scratch3));
11495 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11496 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11502 /* Expand the appropriate insns for doing strlen if not just doing
11505 out = result, initialized with the start address
11506 align_rtx = alignment of the address.
11507 scratch = scratch register, initialized with the startaddress when
11508 not aligned, otherwise undefined
11510 This is just the body. It needs the initialisations mentioned above and
11511 some address computing at the end. These things are done in i386.md. */
11514 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11515 rtx out, align_rtx;
11519 rtx align_2_label = NULL_RTX;
11520 rtx align_3_label = NULL_RTX;
11521 rtx align_4_label = gen_label_rtx ();
11522 rtx end_0_label = gen_label_rtx ();
11524 rtx tmpreg = gen_reg_rtx (SImode);
11525 rtx scratch = gen_reg_rtx (SImode);
11529 if (GET_CODE (align_rtx) == CONST_INT)
11530 align = INTVAL (align_rtx);
11532 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11534 /* Is there a known alignment and is it less than 4? */
11537 rtx scratch1 = gen_reg_rtx (Pmode);
11538 emit_move_insn (scratch1, out);
11539 /* Is there a known alignment and is it not 2? */
11542 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11543 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11545 /* Leave just the 3 lower bits. */
11546 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11547 NULL_RTX, 0, OPTAB_WIDEN);
11549 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11550 Pmode, 1, align_4_label);
11551 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11552 Pmode, 1, align_2_label);
11553 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11554 Pmode, 1, align_3_label);
11558 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11559 check if is aligned to 4 - byte. */
11561 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11562 NULL_RTX, 0, OPTAB_WIDEN);
11564 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11565 Pmode, 1, align_4_label);
11568 mem = gen_rtx_MEM (QImode, out);
11570 /* Now compare the bytes. */
11572 /* Compare the first n unaligned byte on a byte per byte basis. */
11573 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11574 QImode, 1, end_0_label);
11576 /* Increment the address. */
11578 emit_insn (gen_adddi3 (out, out, const1_rtx));
11580 emit_insn (gen_addsi3 (out, out, const1_rtx));
11582 /* Not needed with an alignment of 2 */
11585 emit_label (align_2_label);
11587 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11591 emit_insn (gen_adddi3 (out, out, const1_rtx));
11593 emit_insn (gen_addsi3 (out, out, const1_rtx));
11595 emit_label (align_3_label);
11598 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11602 emit_insn (gen_adddi3 (out, out, const1_rtx));
11604 emit_insn (gen_addsi3 (out, out, const1_rtx));
11607 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11608 align this loop. It gives only huge programs, but does not help to
11610 emit_label (align_4_label);
11612 mem = gen_rtx_MEM (SImode, out);
11613 emit_move_insn (scratch, mem);
11615 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11617 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11619 /* This formula yields a nonzero result iff one of the bytes is zero.
11620 This saves three branches inside loop and many cycles. */
11622 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11623 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11624 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11625 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11626 gen_int_mode (0x80808080, SImode)));
11627 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11632 rtx reg = gen_reg_rtx (SImode);
11633 rtx reg2 = gen_reg_rtx (Pmode);
11634 emit_move_insn (reg, tmpreg);
11635 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11637 /* If zero is not in the first two bytes, move two bytes forward. */
11638 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11639 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11640 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11641 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11642 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11645 /* Emit lea manually to avoid clobbering of flags. */
11646 emit_insn (gen_rtx_SET (SImode, reg2,
11647 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11649 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11650 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11651 emit_insn (gen_rtx_SET (VOIDmode, out,
11652 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11659 rtx end_2_label = gen_label_rtx ();
11660 /* Is zero in the first two bytes? */
11662 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11663 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11664 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11665 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11666 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11668 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11669 JUMP_LABEL (tmp) = end_2_label;
11671 /* Not in the first two. Move two bytes forward. */
11672 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11674 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11676 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11678 emit_label (end_2_label);
11682 /* Avoid branch in fixing the byte. */
11683 tmpreg = gen_lowpart (QImode, tmpreg);
11684 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11685 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11687 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11689 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11691 emit_label (end_0_label);
11695 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11696 rtx retval, fnaddr, callarg1, callarg2, pop;
11699 rtx use = NULL, call;
11701 if (pop == const0_rtx)
11703 if (TARGET_64BIT && pop)
11707 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11708 fnaddr = machopic_indirect_call_target (fnaddr);
11710 /* Static functions and indirect calls don't need the pic register. */
11711 if (! TARGET_64BIT && flag_pic
11712 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11713 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11714 use_reg (&use, pic_offset_table_rtx);
11716 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11718 rtx al = gen_rtx_REG (QImode, 0);
11719 emit_move_insn (al, callarg2);
11720 use_reg (&use, al);
11722 #endif /* TARGET_MACHO */
11724 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11726 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11727 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11729 if (sibcall && TARGET_64BIT
11730 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11733 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11734 fnaddr = gen_rtx_REG (Pmode, 40);
11735 emit_move_insn (fnaddr, addr);
11736 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11739 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11741 call = gen_rtx_SET (VOIDmode, retval, call);
11744 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11745 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11746 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11749 call = emit_call_insn (call);
11751 CALL_INSN_FUNCTION_USAGE (call) = use;
11755 /* Clear stack slot assignments remembered from previous functions.
11756 This is called from INIT_EXPANDERS once before RTL is emitted for each
11759 static struct machine_function *
11760 ix86_init_machine_status ()
11762 return ggc_alloc_cleared (sizeof (struct machine_function));
11765 /* Return a MEM corresponding to a stack slot with mode MODE.
11766 Allocate a new slot if necessary.
11768 The RTL for a function can have several slots available: N is
11769 which slot to use. */
11772 assign_386_stack_local (mode, n)
11773 enum machine_mode mode;
11776 struct stack_local_entry *s;
11778 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11781 for (s = ix86_stack_locals; s; s = s->next)
11782 if (s->mode == mode && s->n == n)
11785 s = (struct stack_local_entry *)
11786 ggc_alloc (sizeof (struct stack_local_entry));
11789 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11791 s->next = ix86_stack_locals;
11792 ix86_stack_locals = s;
11796 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11798 static GTY(()) rtx ix86_tls_symbol;
11800 ix86_tls_get_addr ()
11803 if (!ix86_tls_symbol)
11805 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11806 (TARGET_GNU_TLS && !TARGET_64BIT)
11807 ? "___tls_get_addr"
11808 : "__tls_get_addr");
11811 return ix86_tls_symbol;
11814 /* Calculate the length of the memory address in the instruction
11815 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11818 memory_address_length (addr)
11821 struct ix86_address parts;
11822 rtx base, index, disp;
11825 if (GET_CODE (addr) == PRE_DEC
11826 || GET_CODE (addr) == POST_INC
11827 || GET_CODE (addr) == PRE_MODIFY
11828 || GET_CODE (addr) == POST_MODIFY)
11831 if (! ix86_decompose_address (addr, &parts))
11835 index = parts.index;
11839 /* Register Indirect. */
11840 if (base && !index && !disp)
11842 /* Special cases: ebp and esp need the two-byte modrm form. */
11843 if (addr == stack_pointer_rtx
11844 || addr == arg_pointer_rtx
11845 || addr == frame_pointer_rtx
11846 || addr == hard_frame_pointer_rtx)
11850 /* Direct Addressing. */
11851 else if (disp && !base && !index)
11856 /* Find the length of the displacement constant. */
11859 if (GET_CODE (disp) == CONST_INT
11860 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11866 /* An index requires the two-byte modrm form. */
11874 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11875 is set, expect that insn have 8bit immediate alternative. */
11877 ix86_attr_length_immediate_default (insn, shortform)
11883 extract_insn_cached (insn);
11884 for (i = recog_data.n_operands - 1; i >= 0; --i)
11885 if (CONSTANT_P (recog_data.operand[i]))
11890 && GET_CODE (recog_data.operand[i]) == CONST_INT
11891 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11895 switch (get_attr_mode (insn))
11906 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11911 fatal_insn ("unknown insn mode", insn);
11917 /* Compute default value for "length_address" attribute. */
11919 ix86_attr_length_address_default (insn)
11923 extract_insn_cached (insn);
11924 for (i = recog_data.n_operands - 1; i >= 0; --i)
11925 if (GET_CODE (recog_data.operand[i]) == MEM)
11927 return memory_address_length (XEXP (recog_data.operand[i], 0));
11933 /* Return the maximum number of instructions a cpu can issue. */
11940 case PROCESSOR_PENTIUM:
11944 case PROCESSOR_PENTIUMPRO:
11945 case PROCESSOR_PENTIUM4:
11946 case PROCESSOR_ATHLON:
11955 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11956 by DEP_INSN and nothing set by DEP_INSN. */
11959 ix86_flags_dependant (insn, dep_insn, insn_type)
11960 rtx insn, dep_insn;
11961 enum attr_type insn_type;
11965 /* Simplify the test for uninteresting insns. */
11966 if (insn_type != TYPE_SETCC
11967 && insn_type != TYPE_ICMOV
11968 && insn_type != TYPE_FCMOV
11969 && insn_type != TYPE_IBR)
11972 if ((set = single_set (dep_insn)) != 0)
11974 set = SET_DEST (set);
11977 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11978 && XVECLEN (PATTERN (dep_insn), 0) == 2
11979 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11980 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11982 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11983 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11988 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11991 /* This test is true if the dependent insn reads the flags but
11992 not any other potentially set register. */
11993 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11996 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12002 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12003 address with operands set by DEP_INSN. */
12006 ix86_agi_dependant (insn, dep_insn, insn_type)
12007 rtx insn, dep_insn;
12008 enum attr_type insn_type;
12012 if (insn_type == TYPE_LEA
12015 addr = PATTERN (insn);
12016 if (GET_CODE (addr) == SET)
12018 else if (GET_CODE (addr) == PARALLEL
12019 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12020 addr = XVECEXP (addr, 0, 0);
12023 addr = SET_SRC (addr);
12028 extract_insn_cached (insn);
12029 for (i = recog_data.n_operands - 1; i >= 0; --i)
12030 if (GET_CODE (recog_data.operand[i]) == MEM)
12032 addr = XEXP (recog_data.operand[i], 0);
12039 return modified_in_p (addr, dep_insn);
12043 ix86_adjust_cost (insn, link, dep_insn, cost)
12044 rtx insn, link, dep_insn;
12047 enum attr_type insn_type, dep_insn_type;
12048 enum attr_memory memory, dep_memory;
12050 int dep_insn_code_number;
12052 /* Anti and output dependencies have zero cost on all CPUs. */
12053 if (REG_NOTE_KIND (link) != 0)
12056 dep_insn_code_number = recog_memoized (dep_insn);
12058 /* If we can't recognize the insns, we can't really do anything. */
12059 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12062 insn_type = get_attr_type (insn);
12063 dep_insn_type = get_attr_type (dep_insn);
12067 case PROCESSOR_PENTIUM:
12068 /* Address Generation Interlock adds a cycle of latency. */
12069 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12072 /* ??? Compares pair with jump/setcc. */
12073 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12076 /* Floating point stores require value to be ready one cycle earlier. */
12077 if (insn_type == TYPE_FMOV
12078 && get_attr_memory (insn) == MEMORY_STORE
12079 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12083 case PROCESSOR_PENTIUMPRO:
12084 memory = get_attr_memory (insn);
12085 dep_memory = get_attr_memory (dep_insn);
12087 /* Since we can't represent delayed latencies of load+operation,
12088 increase the cost here for non-imov insns. */
12089 if (dep_insn_type != TYPE_IMOV
12090 && dep_insn_type != TYPE_FMOV
12091 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12094 /* INT->FP conversion is expensive. */
12095 if (get_attr_fp_int_src (dep_insn))
12098 /* There is one cycle extra latency between an FP op and a store. */
12099 if (insn_type == TYPE_FMOV
12100 && (set = single_set (dep_insn)) != NULL_RTX
12101 && (set2 = single_set (insn)) != NULL_RTX
12102 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12103 && GET_CODE (SET_DEST (set2)) == MEM)
12106 /* Show ability of reorder buffer to hide latency of load by executing
12107 in parallel with previous instruction in case
12108 previous instruction is not needed to compute the address. */
12109 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12110 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12112 /* Claim moves to take one cycle, as core can issue one load
12113 at time and the next load can start cycle later. */
12114 if (dep_insn_type == TYPE_IMOV
12115 || dep_insn_type == TYPE_FMOV)
12123 memory = get_attr_memory (insn);
12124 dep_memory = get_attr_memory (dep_insn);
12125 /* The esp dependency is resolved before the instruction is really
12127 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12128 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12131 /* Since we can't represent delayed latencies of load+operation,
12132 increase the cost here for non-imov insns. */
12133 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12134 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12136 /* INT->FP conversion is expensive. */
12137 if (get_attr_fp_int_src (dep_insn))
12140 /* Show ability of reorder buffer to hide latency of load by executing
12141 in parallel with previous instruction in case
12142 previous instruction is not needed to compute the address. */
12143 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12144 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12146 /* Claim moves to take one cycle, as core can issue one load
12147 at time and the next load can start cycle later. */
12148 if (dep_insn_type == TYPE_IMOV
12149 || dep_insn_type == TYPE_FMOV)
12158 case PROCESSOR_ATHLON:
12160 memory = get_attr_memory (insn);
12161 dep_memory = get_attr_memory (dep_insn);
12163 /* Show ability of reorder buffer to hide latency of load by executing
12164 in parallel with previous instruction in case
12165 previous instruction is not needed to compute the address. */
12166 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12167 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12169 /* Claim moves to take one cycle, as core can issue one load
12170 at time and the next load can start cycle later. */
12171 if (dep_insn_type == TYPE_IMOV
12172 || dep_insn_type == TYPE_FMOV)
12174 else if (cost >= 3)
12189 struct ppro_sched_data
12192 int issued_this_cycle;
12196 static enum attr_ppro_uops
12197 ix86_safe_ppro_uops (insn)
12200 if (recog_memoized (insn) >= 0)
12201 return get_attr_ppro_uops (insn);
12203 return PPRO_UOPS_MANY;
12207 ix86_dump_ppro_packet (dump)
12210 if (ix86_sched_data.ppro.decode[0])
12212 fprintf (dump, "PPRO packet: %d",
12213 INSN_UID (ix86_sched_data.ppro.decode[0]));
12214 if (ix86_sched_data.ppro.decode[1])
12215 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12216 if (ix86_sched_data.ppro.decode[2])
12217 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12218 fputc ('\n', dump);
12222 /* We're beginning a new block. Initialize data structures as necessary. */
12225 ix86_sched_init (dump, sched_verbose, veclen)
12226 FILE *dump ATTRIBUTE_UNUSED;
12227 int sched_verbose ATTRIBUTE_UNUSED;
12228 int veclen ATTRIBUTE_UNUSED;
12230 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12233 /* Shift INSN to SLOT, and shift everything else down. */
12236 ix86_reorder_insn (insnp, slot)
12243 insnp[0] = insnp[1];
12244 while (++insnp != slot);
12250 ix86_sched_reorder_ppro (ready, e_ready)
12255 enum attr_ppro_uops cur_uops;
12256 int issued_this_cycle;
12260 /* At this point .ppro.decode contains the state of the three
12261 decoders from last "cycle". That is, those insns that were
12262 actually independent. But here we're scheduling for the
12263 decoder, and we may find things that are decodable in the
12266 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12267 issued_this_cycle = 0;
12270 cur_uops = ix86_safe_ppro_uops (*insnp);
12272 /* If the decoders are empty, and we've a complex insn at the
12273 head of the priority queue, let it issue without complaint. */
12274 if (decode[0] == NULL)
12276 if (cur_uops == PPRO_UOPS_MANY)
12278 decode[0] = *insnp;
12282 /* Otherwise, search for a 2-4 uop unsn to issue. */
12283 while (cur_uops != PPRO_UOPS_FEW)
12285 if (insnp == ready)
12287 cur_uops = ix86_safe_ppro_uops (*--insnp);
12290 /* If so, move it to the head of the line. */
12291 if (cur_uops == PPRO_UOPS_FEW)
12292 ix86_reorder_insn (insnp, e_ready);
12294 /* Issue the head of the queue. */
12295 issued_this_cycle = 1;
12296 decode[0] = *e_ready--;
12299 /* Look for simple insns to fill in the other two slots. */
12300 for (i = 1; i < 3; ++i)
12301 if (decode[i] == NULL)
12303 if (ready > e_ready)
12307 cur_uops = ix86_safe_ppro_uops (*insnp);
12308 while (cur_uops != PPRO_UOPS_ONE)
12310 if (insnp == ready)
12312 cur_uops = ix86_safe_ppro_uops (*--insnp);
12315 /* Found one. Move it to the head of the queue and issue it. */
12316 if (cur_uops == PPRO_UOPS_ONE)
12318 ix86_reorder_insn (insnp, e_ready);
12319 decode[i] = *e_ready--;
12320 issued_this_cycle++;
12324 /* ??? Didn't find one. Ideally, here we would do a lazy split
12325 of 2-uop insns, issue one and queue the other. */
12329 if (issued_this_cycle == 0)
12330 issued_this_cycle = 1;
12331 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12334 /* We are about to being issuing insns for this clock cycle.
12335 Override the default sort algorithm to better slot instructions. */
12337 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12338 FILE *dump ATTRIBUTE_UNUSED;
12339 int sched_verbose ATTRIBUTE_UNUSED;
12342 int clock_var ATTRIBUTE_UNUSED;
12344 int n_ready = *n_readyp;
12345 rtx *e_ready = ready + n_ready - 1;
12347 /* Make sure to go ahead and initialize key items in
12348 ix86_sched_data if we are not going to bother trying to
12349 reorder the ready queue. */
12352 ix86_sched_data.ppro.issued_this_cycle = 1;
12361 case PROCESSOR_PENTIUMPRO:
12362 ix86_sched_reorder_ppro (ready, e_ready);
12367 return ix86_issue_rate ();
12370 /* We are about to issue INSN. Return the number of insns left on the
12371 ready queue that can be issued this cycle. */
12374 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12378 int can_issue_more;
12384 return can_issue_more - 1;
12386 case PROCESSOR_PENTIUMPRO:
12388 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12390 if (uops == PPRO_UOPS_MANY)
12393 ix86_dump_ppro_packet (dump);
12394 ix86_sched_data.ppro.decode[0] = insn;
12395 ix86_sched_data.ppro.decode[1] = NULL;
12396 ix86_sched_data.ppro.decode[2] = NULL;
12398 ix86_dump_ppro_packet (dump);
12399 ix86_sched_data.ppro.decode[0] = NULL;
12401 else if (uops == PPRO_UOPS_FEW)
12404 ix86_dump_ppro_packet (dump);
12405 ix86_sched_data.ppro.decode[0] = insn;
12406 ix86_sched_data.ppro.decode[1] = NULL;
12407 ix86_sched_data.ppro.decode[2] = NULL;
12411 for (i = 0; i < 3; ++i)
12412 if (ix86_sched_data.ppro.decode[i] == NULL)
12414 ix86_sched_data.ppro.decode[i] = insn;
12422 ix86_dump_ppro_packet (dump);
12423 ix86_sched_data.ppro.decode[0] = NULL;
12424 ix86_sched_data.ppro.decode[1] = NULL;
12425 ix86_sched_data.ppro.decode[2] = NULL;
12429 return --ix86_sched_data.ppro.issued_this_cycle;
12434 ia32_use_dfa_pipeline_interface ()
12436 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12441 /* How many alternative schedules to try. This should be as wide as the
12442 scheduling freedom in the DFA, but no wider. Making this value too
12443 large results extra work for the scheduler. */
12446 ia32_multipass_dfa_lookahead ()
12448 if (ix86_tune == PROCESSOR_PENTIUM)
12455 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12456 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12460 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12462 rtx dstref, srcref, dstreg, srcreg;
12466 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12468 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12472 /* Subroutine of above to actually do the updating by recursively walking
12476 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12478 rtx dstref, srcref, dstreg, srcreg;
12480 enum rtx_code code = GET_CODE (x);
12481 const char *format_ptr = GET_RTX_FORMAT (code);
12484 if (code == MEM && XEXP (x, 0) == dstreg)
12485 MEM_COPY_ATTRIBUTES (x, dstref);
12486 else if (code == MEM && XEXP (x, 0) == srcreg)
12487 MEM_COPY_ATTRIBUTES (x, srcref);
12489 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12491 if (*format_ptr == 'e')
12492 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12494 else if (*format_ptr == 'E')
12495 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12496 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12501 /* Compute the alignment given to a constant that is being placed in memory.
12502 EXP is the constant and ALIGN is the alignment that the object would
12504 The value of this function is used instead of that alignment to align
12508 ix86_constant_alignment (exp, align)
12512 if (TREE_CODE (exp) == REAL_CST)
12514 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12516 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12519 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12526 /* Compute the alignment for a static variable.
12527 TYPE is the data type, and ALIGN is the alignment that
12528 the object would ordinarily have. The value of this function is used
12529 instead of that alignment to align the object. */
12532 ix86_data_alignment (type, align)
12536 if (AGGREGATE_TYPE_P (type)
12537 && TYPE_SIZE (type)
12538 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12539 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12540 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12543 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12544 to 16byte boundary. */
12547 if (AGGREGATE_TYPE_P (type)
12548 && TYPE_SIZE (type)
12549 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12550 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12551 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12555 if (TREE_CODE (type) == ARRAY_TYPE)
12557 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12559 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12562 else if (TREE_CODE (type) == COMPLEX_TYPE)
12565 if (TYPE_MODE (type) == DCmode && align < 64)
12567 if (TYPE_MODE (type) == XCmode && align < 128)
12570 else if ((TREE_CODE (type) == RECORD_TYPE
12571 || TREE_CODE (type) == UNION_TYPE
12572 || TREE_CODE (type) == QUAL_UNION_TYPE)
12573 && TYPE_FIELDS (type))
12575 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12577 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12580 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12581 || TREE_CODE (type) == INTEGER_TYPE)
12583 if (TYPE_MODE (type) == DFmode && align < 64)
12585 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12592 /* Compute the alignment for a local variable.
12593 TYPE is the data type, and ALIGN is the alignment that
12594 the object would ordinarily have. The value of this macro is used
12595 instead of that alignment to align the object. */
12598 ix86_local_alignment (type, align)
12602 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12603 to 16byte boundary. */
12606 if (AGGREGATE_TYPE_P (type)
12607 && TYPE_SIZE (type)
12608 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12609 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12610 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12613 if (TREE_CODE (type) == ARRAY_TYPE)
12615 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12617 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12620 else if (TREE_CODE (type) == COMPLEX_TYPE)
12622 if (TYPE_MODE (type) == DCmode && align < 64)
12624 if (TYPE_MODE (type) == XCmode && align < 128)
12627 else if ((TREE_CODE (type) == RECORD_TYPE
12628 || TREE_CODE (type) == UNION_TYPE
12629 || TREE_CODE (type) == QUAL_UNION_TYPE)
12630 && TYPE_FIELDS (type))
12632 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12634 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12637 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12638 || TREE_CODE (type) == INTEGER_TYPE)
12641 if (TYPE_MODE (type) == DFmode && align < 64)
12643 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12649 /* Emit RTL insns to initialize the variable parts of a trampoline.
12650 FNADDR is an RTX for the address of the function's pure code.
12651 CXT is an RTX for the static chain value for the function. */
12653 x86_initialize_trampoline (tramp, fnaddr, cxt)
12654 rtx tramp, fnaddr, cxt;
12658 /* Compute offset from the end of the jmp to the target function. */
12659 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12660 plus_constant (tramp, 10),
12661 NULL_RTX, 1, OPTAB_DIRECT);
12662 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12663 gen_int_mode (0xb9, QImode));
12664 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12665 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12666 gen_int_mode (0xe9, QImode));
12667 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12672 /* Try to load address using shorter movl instead of movabs.
12673 We may want to support movq for kernel mode, but kernel does not use
12674 trampolines at the moment. */
12675 if (x86_64_zero_extended_value (fnaddr))
12677 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12678 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12679 gen_int_mode (0xbb41, HImode));
12680 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12681 gen_lowpart (SImode, fnaddr));
12686 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12687 gen_int_mode (0xbb49, HImode));
12688 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12692 /* Load static chain using movabs to r10. */
12693 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12694 gen_int_mode (0xba49, HImode));
12695 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12698 /* Jump to the r11 */
12699 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12700 gen_int_mode (0xff49, HImode));
12701 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12702 gen_int_mode (0xe3, QImode));
12704 if (offset > TRAMPOLINE_SIZE)
12708 #ifdef TRANSFER_FROM_TRAMPOLINE
12709 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12710 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12714 #define def_builtin(MASK, NAME, TYPE, CODE) \
12716 if ((MASK) & target_flags \
12717 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12718 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12719 NULL, NULL_TREE); \
12722 struct builtin_description
12724 const unsigned int mask;
12725 const enum insn_code icode;
12726 const char *const name;
12727 const enum ix86_builtins code;
12728 const enum rtx_code comparison;
12729 const unsigned int flag;
12732 /* Used for builtins that are enabled both by -msse and -msse2. */
12733 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12734 #define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
12735 #define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
12737 static const struct builtin_description bdesc_comi[] =
12739 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12740 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12741 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12742 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12743 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12744 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12745 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12746 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12747 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12748 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12749 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12750 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12751 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12753 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12754 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12755 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12756 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12757 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12758 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12759 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12760 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12762 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12765 static const struct builtin_description bdesc_2arg[] =
12768 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12769 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12770 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12771 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12772 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12773 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12774 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12775 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12777 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12778 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12779 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12780 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12781 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12782 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12783 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12784 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12785 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12786 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12787 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12788 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12789 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12790 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12791 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12792 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12793 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12794 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12795 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12796 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12798 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12799 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12800 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12801 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12803 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12804 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12805 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12806 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12808 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12809 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12810 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12811 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12812 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12815 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12816 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12817 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12818 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12819 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12820 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12821 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12822 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12824 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12825 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12826 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12827 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12828 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12829 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12830 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12831 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12833 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12834 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12835 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12837 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12838 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12839 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12840 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12842 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12843 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12845 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12846 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12847 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12848 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12849 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12850 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12852 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12853 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12854 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12855 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12857 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12858 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12859 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12860 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12861 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12862 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12865 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12866 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12867 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12869 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12870 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12871 { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12873 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12874 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12875 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12876 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12877 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12878 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12880 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12881 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12882 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12883 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12884 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12887 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12888 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12889 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12890 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12892 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12893 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12896 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12897 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12898 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12899 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12900 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12901 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12902 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12903 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12905 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12906 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12907 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12908 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12909 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12910 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12911 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12912 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12913 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12914 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12915 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12916 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12917 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12918 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12919 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12920 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12921 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12922 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12923 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12924 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12926 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12927 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12928 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12929 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12931 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12932 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12933 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12934 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12936 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12937 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12938 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12941 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12942 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12950 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12951 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12952 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12953 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12954 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12955 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12956 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12957 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12959 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12960 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12961 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12962 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12964 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12965 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12966 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12967 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12969 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12970 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12972 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12973 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12977 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12982 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12986 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12987 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12997 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12998 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13000 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13001 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13002 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13003 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13004 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13005 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13010 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13015 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13022 { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
13027 static const struct builtin_description bdesc_1arg[] =
13029 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13030 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13032 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13033 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13034 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13036 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13037 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13038 { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13039 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13040 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13041 { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13043 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13046 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13053 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13063 { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13064 { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13067 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
13074 ix86_init_builtins ()
13077 ix86_init_mmx_sse_builtins ();
13080 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13081 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13084 ix86_init_mmx_sse_builtins ()
13086 const struct builtin_description * d;
13089 tree pchar_type_node = build_pointer_type (char_type_node);
13090 tree pcchar_type_node = build_pointer_type (
13091 build_type_variant (char_type_node, 1, 0));
13092 tree pfloat_type_node = build_pointer_type (float_type_node);
13093 tree pcfloat_type_node = build_pointer_type (
13094 build_type_variant (float_type_node, 1, 0));
13095 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13096 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13097 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13100 tree int_ftype_v4sf_v4sf
13101 = build_function_type_list (integer_type_node,
13102 V4SF_type_node, V4SF_type_node, NULL_TREE);
13103 tree v4si_ftype_v4sf_v4sf
13104 = build_function_type_list (V4SI_type_node,
13105 V4SF_type_node, V4SF_type_node, NULL_TREE);
13106 /* MMX/SSE/integer conversions. */
13107 tree int_ftype_v4sf
13108 = build_function_type_list (integer_type_node,
13109 V4SF_type_node, NULL_TREE);
13110 tree int64_ftype_v4sf
13111 = build_function_type_list (long_long_integer_type_node,
13112 V4SF_type_node, NULL_TREE);
13113 tree int_ftype_v8qi
13114 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13115 tree v4sf_ftype_v4sf_int
13116 = build_function_type_list (V4SF_type_node,
13117 V4SF_type_node, integer_type_node, NULL_TREE);
13118 tree v4sf_ftype_v4sf_int64
13119 = build_function_type_list (V4SF_type_node,
13120 V4SF_type_node, long_long_integer_type_node,
13122 tree v4sf_ftype_v4sf_v2si
13123 = build_function_type_list (V4SF_type_node,
13124 V4SF_type_node, V2SI_type_node, NULL_TREE);
13125 tree int_ftype_v4hi_int
13126 = build_function_type_list (integer_type_node,
13127 V4HI_type_node, integer_type_node, NULL_TREE);
13128 tree v4hi_ftype_v4hi_int_int
13129 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13130 integer_type_node, integer_type_node,
13132 /* Miscellaneous. */
13133 tree v8qi_ftype_v4hi_v4hi
13134 = build_function_type_list (V8QI_type_node,
13135 V4HI_type_node, V4HI_type_node, NULL_TREE);
13136 tree v4hi_ftype_v2si_v2si
13137 = build_function_type_list (V4HI_type_node,
13138 V2SI_type_node, V2SI_type_node, NULL_TREE);
13139 tree v4sf_ftype_v4sf_v4sf_int
13140 = build_function_type_list (V4SF_type_node,
13141 V4SF_type_node, V4SF_type_node,
13142 integer_type_node, NULL_TREE);
13143 tree v2si_ftype_v4hi_v4hi
13144 = build_function_type_list (V2SI_type_node,
13145 V4HI_type_node, V4HI_type_node, NULL_TREE);
13146 tree v4hi_ftype_v4hi_int
13147 = build_function_type_list (V4HI_type_node,
13148 V4HI_type_node, integer_type_node, NULL_TREE);
13149 tree v4hi_ftype_v4hi_di
13150 = build_function_type_list (V4HI_type_node,
13151 V4HI_type_node, long_long_unsigned_type_node,
13153 tree v2si_ftype_v2si_di
13154 = build_function_type_list (V2SI_type_node,
13155 V2SI_type_node, long_long_unsigned_type_node,
13157 tree void_ftype_void
13158 = build_function_type (void_type_node, void_list_node);
13159 tree void_ftype_unsigned
13160 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13161 tree unsigned_ftype_void
13162 = build_function_type (unsigned_type_node, void_list_node);
13164 = build_function_type (long_long_unsigned_type_node, void_list_node);
13165 tree v4sf_ftype_void
13166 = build_function_type (V4SF_type_node, void_list_node);
13167 tree v2si_ftype_v4sf
13168 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13169 /* Loads/stores. */
13170 tree void_ftype_v8qi_v8qi_pchar
13171 = build_function_type_list (void_type_node,
13172 V8QI_type_node, V8QI_type_node,
13173 pchar_type_node, NULL_TREE);
13174 tree v4sf_ftype_pcfloat
13175 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13176 /* @@@ the type is bogus */
13177 tree v4sf_ftype_v4sf_pv2si
13178 = build_function_type_list (V4SF_type_node,
13179 V4SF_type_node, pv2si_type_node, NULL_TREE);
13180 tree void_ftype_pv2si_v4sf
13181 = build_function_type_list (void_type_node,
13182 pv2si_type_node, V4SF_type_node, NULL_TREE);
13183 tree void_ftype_pfloat_v4sf
13184 = build_function_type_list (void_type_node,
13185 pfloat_type_node, V4SF_type_node, NULL_TREE);
13186 tree void_ftype_pdi_di
13187 = build_function_type_list (void_type_node,
13188 pdi_type_node, long_long_unsigned_type_node,
13190 tree void_ftype_pv2di_v2di
13191 = build_function_type_list (void_type_node,
13192 pv2di_type_node, V2DI_type_node, NULL_TREE);
13193 /* Normal vector unops. */
13194 tree v4sf_ftype_v4sf
13195 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13197 /* Normal vector binops. */
13198 tree v4sf_ftype_v4sf_v4sf
13199 = build_function_type_list (V4SF_type_node,
13200 V4SF_type_node, V4SF_type_node, NULL_TREE);
13201 tree v8qi_ftype_v8qi_v8qi
13202 = build_function_type_list (V8QI_type_node,
13203 V8QI_type_node, V8QI_type_node, NULL_TREE);
13204 tree v4hi_ftype_v4hi_v4hi
13205 = build_function_type_list (V4HI_type_node,
13206 V4HI_type_node, V4HI_type_node, NULL_TREE);
13207 tree v2si_ftype_v2si_v2si
13208 = build_function_type_list (V2SI_type_node,
13209 V2SI_type_node, V2SI_type_node, NULL_TREE);
13210 tree di_ftype_di_di
13211 = build_function_type_list (long_long_unsigned_type_node,
13212 long_long_unsigned_type_node,
13213 long_long_unsigned_type_node, NULL_TREE);
13215 tree v2si_ftype_v2sf
13216 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13217 tree v2sf_ftype_v2si
13218 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13219 tree v2si_ftype_v2si
13220 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13221 tree v2sf_ftype_v2sf
13222 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13223 tree v2sf_ftype_v2sf_v2sf
13224 = build_function_type_list (V2SF_type_node,
13225 V2SF_type_node, V2SF_type_node, NULL_TREE);
13226 tree v2si_ftype_v2sf_v2sf
13227 = build_function_type_list (V2SI_type_node,
13228 V2SF_type_node, V2SF_type_node, NULL_TREE);
13229 tree pint_type_node = build_pointer_type (integer_type_node);
13230 tree pcint_type_node = build_pointer_type (
13231 build_type_variant (integer_type_node, 1, 0));
13232 tree pdouble_type_node = build_pointer_type (double_type_node);
13233 tree pcdouble_type_node = build_pointer_type (
13234 build_type_variant (double_type_node, 1, 0));
13235 tree int_ftype_v2df_v2df
13236 = build_function_type_list (integer_type_node,
13237 V2DF_type_node, V2DF_type_node, NULL_TREE);
13240 = build_function_type (intTI_type_node, void_list_node);
13241 tree v2di_ftype_void
13242 = build_function_type (V2DI_type_node, void_list_node);
13243 tree ti_ftype_ti_ti
13244 = build_function_type_list (intTI_type_node,
13245 intTI_type_node, intTI_type_node, NULL_TREE);
13246 tree void_ftype_pcvoid
13247 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13249 = build_function_type_list (V2DI_type_node,
13250 long_long_unsigned_type_node, NULL_TREE);
13252 = build_function_type_list (long_long_unsigned_type_node,
13253 V2DI_type_node, NULL_TREE);
13254 tree v4sf_ftype_v4si
13255 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13256 tree v4si_ftype_v4sf
13257 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13258 tree v2df_ftype_v4si
13259 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13260 tree v4si_ftype_v2df
13261 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13262 tree v2si_ftype_v2df
13263 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13264 tree v4sf_ftype_v2df
13265 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13266 tree v2df_ftype_v2si
13267 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13268 tree v2df_ftype_v4sf
13269 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13270 tree int_ftype_v2df
13271 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13272 tree int64_ftype_v2df
13273 = build_function_type_list (long_long_integer_type_node,
13274 V2DF_type_node, NULL_TREE);
13275 tree v2df_ftype_v2df_int
13276 = build_function_type_list (V2DF_type_node,
13277 V2DF_type_node, integer_type_node, NULL_TREE);
13278 tree v2df_ftype_v2df_int64
13279 = build_function_type_list (V2DF_type_node,
13280 V2DF_type_node, long_long_integer_type_node,
13282 tree v4sf_ftype_v4sf_v2df
13283 = build_function_type_list (V4SF_type_node,
13284 V4SF_type_node, V2DF_type_node, NULL_TREE);
13285 tree v2df_ftype_v2df_v4sf
13286 = build_function_type_list (V2DF_type_node,
13287 V2DF_type_node, V4SF_type_node, NULL_TREE);
13288 tree v2df_ftype_v2df_v2df_int
13289 = build_function_type_list (V2DF_type_node,
13290 V2DF_type_node, V2DF_type_node,
13293 tree v2df_ftype_v2df_pv2si
13294 = build_function_type_list (V2DF_type_node,
13295 V2DF_type_node, pv2si_type_node, NULL_TREE);
13296 tree void_ftype_pv2si_v2df
13297 = build_function_type_list (void_type_node,
13298 pv2si_type_node, V2DF_type_node, NULL_TREE);
13299 tree void_ftype_pdouble_v2df
13300 = build_function_type_list (void_type_node,
13301 pdouble_type_node, V2DF_type_node, NULL_TREE);
13302 tree void_ftype_pint_int
13303 = build_function_type_list (void_type_node,
13304 pint_type_node, integer_type_node, NULL_TREE);
13305 tree void_ftype_v16qi_v16qi_pchar
13306 = build_function_type_list (void_type_node,
13307 V16QI_type_node, V16QI_type_node,
13308 pchar_type_node, NULL_TREE);
13309 tree v2df_ftype_pcdouble
13310 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13311 tree v2df_ftype_v2df_v2df
13312 = build_function_type_list (V2DF_type_node,
13313 V2DF_type_node, V2DF_type_node, NULL_TREE);
13314 tree v16qi_ftype_v16qi_v16qi
13315 = build_function_type_list (V16QI_type_node,
13316 V16QI_type_node, V16QI_type_node, NULL_TREE);
13317 tree v8hi_ftype_v8hi_v8hi
13318 = build_function_type_list (V8HI_type_node,
13319 V8HI_type_node, V8HI_type_node, NULL_TREE);
13320 tree v4si_ftype_v4si_v4si
13321 = build_function_type_list (V4SI_type_node,
13322 V4SI_type_node, V4SI_type_node, NULL_TREE);
13323 tree v2di_ftype_v2di_v2di
13324 = build_function_type_list (V2DI_type_node,
13325 V2DI_type_node, V2DI_type_node, NULL_TREE);
13326 tree v2di_ftype_v2df_v2df
13327 = build_function_type_list (V2DI_type_node,
13328 V2DF_type_node, V2DF_type_node, NULL_TREE);
13329 tree v2df_ftype_v2df
13330 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13331 tree v2df_ftype_double
13332 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13333 tree v2df_ftype_double_double
13334 = build_function_type_list (V2DF_type_node,
13335 double_type_node, double_type_node, NULL_TREE);
13336 tree int_ftype_v8hi_int
13337 = build_function_type_list (integer_type_node,
13338 V8HI_type_node, integer_type_node, NULL_TREE);
13339 tree v8hi_ftype_v8hi_int_int
13340 = build_function_type_list (V8HI_type_node,
13341 V8HI_type_node, integer_type_node,
13342 integer_type_node, NULL_TREE);
13343 tree v2di_ftype_v2di_int
13344 = build_function_type_list (V2DI_type_node,
13345 V2DI_type_node, integer_type_node, NULL_TREE);
13346 tree v4si_ftype_v4si_int
13347 = build_function_type_list (V4SI_type_node,
13348 V4SI_type_node, integer_type_node, NULL_TREE);
13349 tree v8hi_ftype_v8hi_int
13350 = build_function_type_list (V8HI_type_node,
13351 V8HI_type_node, integer_type_node, NULL_TREE);
13352 tree v8hi_ftype_v8hi_v2di
13353 = build_function_type_list (V8HI_type_node,
13354 V8HI_type_node, V2DI_type_node, NULL_TREE);
13355 tree v4si_ftype_v4si_v2di
13356 = build_function_type_list (V4SI_type_node,
13357 V4SI_type_node, V2DI_type_node, NULL_TREE);
13358 tree v4si_ftype_v8hi_v8hi
13359 = build_function_type_list (V4SI_type_node,
13360 V8HI_type_node, V8HI_type_node, NULL_TREE);
13361 tree di_ftype_v8qi_v8qi
13362 = build_function_type_list (long_long_unsigned_type_node,
13363 V8QI_type_node, V8QI_type_node, NULL_TREE);
13364 tree v2di_ftype_v16qi_v16qi
13365 = build_function_type_list (V2DI_type_node,
13366 V16QI_type_node, V16QI_type_node, NULL_TREE);
13367 tree int_ftype_v16qi
13368 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13369 tree v16qi_ftype_pcchar
13370 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13371 tree void_ftype_pchar_v16qi
13372 = build_function_type_list (void_type_node,
13373 pchar_type_node, V16QI_type_node, NULL_TREE);
13374 tree v4si_ftype_pcint
13375 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13376 tree void_ftype_pcint_v4si
13377 = build_function_type_list (void_type_node,
13378 pcint_type_node, V4SI_type_node, NULL_TREE);
13379 tree v2di_ftype_v2di
13380 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13382 /* Add all builtins that are more or less simple operations on two
13384 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13386 /* Use one of the operands; the target can have a different mode for
13387 mask-generating compares. */
13388 enum machine_mode mode;
13393 mode = insn_data[d->icode].operand[1].mode;
13398 type = v16qi_ftype_v16qi_v16qi;
13401 type = v8hi_ftype_v8hi_v8hi;
13404 type = v4si_ftype_v4si_v4si;
13407 type = v2di_ftype_v2di_v2di;
13410 type = v2df_ftype_v2df_v2df;
13413 type = ti_ftype_ti_ti;
13416 type = v4sf_ftype_v4sf_v4sf;
13419 type = v8qi_ftype_v8qi_v8qi;
13422 type = v4hi_ftype_v4hi_v4hi;
13425 type = v2si_ftype_v2si_v2si;
13428 type = di_ftype_di_di;
13435 /* Override for comparisons. */
13436 if (d->icode == CODE_FOR_maskcmpv4sf3
13437 || d->icode == CODE_FOR_maskncmpv4sf3
13438 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13439 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13440 type = v4si_ftype_v4sf_v4sf;
13442 if (d->icode == CODE_FOR_maskcmpv2df3
13443 || d->icode == CODE_FOR_maskncmpv2df3
13444 || d->icode == CODE_FOR_vmmaskcmpv2df3
13445 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13446 type = v2di_ftype_v2df_v2df;
13448 def_builtin (d->mask, d->name, type, d->code);
13451 /* Add the remaining MMX insns with somewhat more complicated types. */
13452 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13453 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13454 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13455 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13456 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13458 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13459 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13460 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13462 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13463 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13465 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13466 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13468 /* comi/ucomi insns. */
13469 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13470 if (d->mask == MASK_SSE2)
13471 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13473 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13475 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13476 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13477 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13479 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13480 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13481 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13482 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13483 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13484 def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13485 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13486 def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13487 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13488 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13489 def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13491 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13492 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13494 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13496 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13497 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13498 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13499 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13500 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13501 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13503 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13504 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13505 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13506 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13508 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13509 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13510 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13511 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13513 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13515 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13517 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13518 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13519 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13520 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13521 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13522 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13524 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13526 /* Original 3DNow! */
13527 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13528 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13529 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13530 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13531 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13532 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13533 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13534 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13535 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13536 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13537 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13538 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13539 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13540 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13541 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13542 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13543 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13544 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13545 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13546 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13548 /* 3DNow! extension as used in the Athlon CPU. */
13549 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13550 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13551 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13552 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13553 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13554 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13556 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13559 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13560 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13562 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13563 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13564 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13566 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13567 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13568 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13569 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13570 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13571 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13573 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13574 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13575 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13576 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13578 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13579 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13580 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13581 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13582 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13584 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13585 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13586 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13587 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13589 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13590 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13592 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13594 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13595 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13597 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13598 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13599 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13600 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13601 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13603 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13605 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13606 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13607 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13608 def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13610 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13611 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13612 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13614 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13615 def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13616 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13617 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13619 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13620 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13621 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13622 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13623 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13624 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13625 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13628 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13632 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13639 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13650 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13655 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13658 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13660 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13663 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13668 /* Errors in the source file can cause expand_expr to return const0_rtx
13669 where we expect a vector. To avoid crashing, use one of the vector
13670 clear instructions. */
13672 safe_vector_operand (x, mode)
13674 enum machine_mode mode;
13676 if (x != const0_rtx)
13678 x = gen_reg_rtx (mode);
13680 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13681 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13682 : gen_rtx_SUBREG (DImode, x, 0)));
13684 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13685 : gen_rtx_SUBREG (V4SFmode, x, 0),
13686 CONST0_RTX (V4SFmode)));
13690 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13693 ix86_expand_binop_builtin (icode, arglist, target)
13694 enum insn_code icode;
13699 tree arg0 = TREE_VALUE (arglist);
13700 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13701 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13702 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13703 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13704 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13705 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13707 if (VECTOR_MODE_P (mode0))
13708 op0 = safe_vector_operand (op0, mode0);
13709 if (VECTOR_MODE_P (mode1))
13710 op1 = safe_vector_operand (op1, mode1);
13713 || GET_MODE (target) != tmode
13714 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13715 target = gen_reg_rtx (tmode);
13717 if (GET_MODE (op1) == SImode && mode1 == TImode)
13719 rtx x = gen_reg_rtx (V4SImode);
13720 emit_insn (gen_sse2_loadd (x, op1));
13721 op1 = gen_lowpart (TImode, x);
13724 /* In case the insn wants input operands in modes different from
13725 the result, abort. */
13726 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13729 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13730 op0 = copy_to_mode_reg (mode0, op0);
13731 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13732 op1 = copy_to_mode_reg (mode1, op1);
13734 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13735 yet one of the two must not be a memory. This is normally enforced
13736 by expanders, but we didn't bother to create one here. */
13737 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13738 op0 = copy_to_mode_reg (mode0, op0);
13740 pat = GEN_FCN (icode) (target, op0, op1);
13747 /* Subroutine of ix86_expand_builtin to take care of stores. */
13750 ix86_expand_store_builtin (icode, arglist)
13751 enum insn_code icode;
13755 tree arg0 = TREE_VALUE (arglist);
13756 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13757 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13758 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13759 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13760 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13762 if (VECTOR_MODE_P (mode1))
13763 op1 = safe_vector_operand (op1, mode1);
13765 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13766 op1 = copy_to_mode_reg (mode1, op1);
13768 pat = GEN_FCN (icode) (op0, op1);
13774 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13777 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13778 enum insn_code icode;
13784 tree arg0 = TREE_VALUE (arglist);
13785 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13786 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13787 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13790 || GET_MODE (target) != tmode
13791 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13792 target = gen_reg_rtx (tmode);
13794 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13797 if (VECTOR_MODE_P (mode0))
13798 op0 = safe_vector_operand (op0, mode0);
13800 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13801 op0 = copy_to_mode_reg (mode0, op0);
13804 pat = GEN_FCN (icode) (target, op0);
13811 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13812 sqrtss, rsqrtss, rcpss. */
13815 ix86_expand_unop1_builtin (icode, arglist, target)
13816 enum insn_code icode;
13821 tree arg0 = TREE_VALUE (arglist);
13822 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13823 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13824 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13827 || GET_MODE (target) != tmode
13828 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13829 target = gen_reg_rtx (tmode);
13831 if (VECTOR_MODE_P (mode0))
13832 op0 = safe_vector_operand (op0, mode0);
13834 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13835 op0 = copy_to_mode_reg (mode0, op0);
13838 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13839 op1 = copy_to_mode_reg (mode0, op1);
13841 pat = GEN_FCN (icode) (target, op0, op1);
13848 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13851 ix86_expand_sse_compare (d, arglist, target)
13852 const struct builtin_description *d;
13857 tree arg0 = TREE_VALUE (arglist);
13858 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13859 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13860 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13862 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13863 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13864 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13865 enum rtx_code comparison = d->comparison;
13867 if (VECTOR_MODE_P (mode0))
13868 op0 = safe_vector_operand (op0, mode0);
13869 if (VECTOR_MODE_P (mode1))
13870 op1 = safe_vector_operand (op1, mode1);
13872 /* Swap operands if we have a comparison that isn't available in
13876 rtx tmp = gen_reg_rtx (mode1);
13877 emit_move_insn (tmp, op1);
13883 || GET_MODE (target) != tmode
13884 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13885 target = gen_reg_rtx (tmode);
13887 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13888 op0 = copy_to_mode_reg (mode0, op0);
13889 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13890 op1 = copy_to_mode_reg (mode1, op1);
13892 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13893 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13900 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13903 ix86_expand_sse_comi (d, arglist, target)
13904 const struct builtin_description *d;
13909 tree arg0 = TREE_VALUE (arglist);
13910 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13911 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13912 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13914 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13915 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13916 enum rtx_code comparison = d->comparison;
13918 if (VECTOR_MODE_P (mode0))
13919 op0 = safe_vector_operand (op0, mode0);
13920 if (VECTOR_MODE_P (mode1))
13921 op1 = safe_vector_operand (op1, mode1);
13923 /* Swap operands if we have a comparison that isn't available in
13932 target = gen_reg_rtx (SImode);
13933 emit_move_insn (target, const0_rtx);
13934 target = gen_rtx_SUBREG (QImode, target, 0);
13936 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13937 op0 = copy_to_mode_reg (mode0, op0);
13938 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13939 op1 = copy_to_mode_reg (mode1, op1);
13941 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13942 pat = GEN_FCN (d->icode) (op0, op1);
13946 emit_insn (gen_rtx_SET (VOIDmode,
13947 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13948 gen_rtx_fmt_ee (comparison, QImode,
13952 return SUBREG_REG (target);
13955 /* Expand an expression EXP that calls a built-in function,
13956 with result going to TARGET if that's convenient
13957 (and in mode MODE if that's convenient).
13958 SUBTARGET may be used as the target for computing one of EXP's operands.
13959 IGNORE is nonzero if the value is to be ignored. */
13962 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13965 rtx subtarget ATTRIBUTE_UNUSED;
13966 enum machine_mode mode ATTRIBUTE_UNUSED;
13967 int ignore ATTRIBUTE_UNUSED;
13969 const struct builtin_description *d;
13971 enum insn_code icode;
13972 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13973 tree arglist = TREE_OPERAND (exp, 1);
13974 tree arg0, arg1, arg2;
13975 rtx op0, op1, op2, pat;
13976 enum machine_mode tmode, mode0, mode1, mode2;
13977 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13981 case IX86_BUILTIN_EMMS:
13982 emit_insn (gen_emms ());
13985 case IX86_BUILTIN_SFENCE:
13986 emit_insn (gen_sfence ());
13989 case IX86_BUILTIN_PEXTRW:
13990 case IX86_BUILTIN_PEXTRW128:
13991 icode = (fcode == IX86_BUILTIN_PEXTRW
13992 ? CODE_FOR_mmx_pextrw
13993 : CODE_FOR_sse2_pextrw);
13994 arg0 = TREE_VALUE (arglist);
13995 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13996 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13997 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13998 tmode = insn_data[icode].operand[0].mode;
13999 mode0 = insn_data[icode].operand[1].mode;
14000 mode1 = insn_data[icode].operand[2].mode;
14002 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14003 op0 = copy_to_mode_reg (mode0, op0);
14004 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14006 /* @@@ better error message */
14007 error ("selector must be an immediate");
14008 return gen_reg_rtx (tmode);
14011 || GET_MODE (target) != tmode
14012 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14013 target = gen_reg_rtx (tmode);
14014 pat = GEN_FCN (icode) (target, op0, op1);
14020 case IX86_BUILTIN_PINSRW:
14021 case IX86_BUILTIN_PINSRW128:
14022 icode = (fcode == IX86_BUILTIN_PINSRW
14023 ? CODE_FOR_mmx_pinsrw
14024 : CODE_FOR_sse2_pinsrw);
14025 arg0 = TREE_VALUE (arglist);
14026 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14027 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14028 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14029 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14030 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14031 tmode = insn_data[icode].operand[0].mode;
14032 mode0 = insn_data[icode].operand[1].mode;
14033 mode1 = insn_data[icode].operand[2].mode;
14034 mode2 = insn_data[icode].operand[3].mode;
14036 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14037 op0 = copy_to_mode_reg (mode0, op0);
14038 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14039 op1 = copy_to_mode_reg (mode1, op1);
14040 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14042 /* @@@ better error message */
14043 error ("selector must be an immediate");
14047 || GET_MODE (target) != tmode
14048 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14049 target = gen_reg_rtx (tmode);
14050 pat = GEN_FCN (icode) (target, op0, op1, op2);
14056 case IX86_BUILTIN_MASKMOVQ:
14057 case IX86_BUILTIN_MASKMOVDQU:
14058 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14059 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14060 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14061 : CODE_FOR_sse2_maskmovdqu));
14062 /* Note the arg order is different from the operand order. */
14063 arg1 = TREE_VALUE (arglist);
14064 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14065 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14066 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14067 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14068 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14069 mode0 = insn_data[icode].operand[0].mode;
14070 mode1 = insn_data[icode].operand[1].mode;
14071 mode2 = insn_data[icode].operand[2].mode;
14073 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14074 op0 = copy_to_mode_reg (mode0, op0);
14075 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14076 op1 = copy_to_mode_reg (mode1, op1);
14077 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14078 op2 = copy_to_mode_reg (mode2, op2);
14079 pat = GEN_FCN (icode) (op0, op1, op2);
14085 case IX86_BUILTIN_SQRTSS:
14086 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14087 case IX86_BUILTIN_RSQRTSS:
14088 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14089 case IX86_BUILTIN_RCPSS:
14090 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14092 case IX86_BUILTIN_LOADAPS:
14093 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14095 case IX86_BUILTIN_LOADUPS:
14096 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14098 case IX86_BUILTIN_STOREAPS:
14099 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14101 case IX86_BUILTIN_STOREUPS:
14102 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14104 case IX86_BUILTIN_LOADSS:
14105 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14107 case IX86_BUILTIN_STORESS:
14108 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14110 case IX86_BUILTIN_LOADHPS:
14111 case IX86_BUILTIN_LOADLPS:
14112 case IX86_BUILTIN_LOADHPD:
14113 case IX86_BUILTIN_LOADLPD:
14114 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14115 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14116 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14117 : CODE_FOR_sse2_movlpd);
14118 arg0 = TREE_VALUE (arglist);
14119 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14120 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14121 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14122 tmode = insn_data[icode].operand[0].mode;
14123 mode0 = insn_data[icode].operand[1].mode;
14124 mode1 = insn_data[icode].operand[2].mode;
14126 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14127 op0 = copy_to_mode_reg (mode0, op0);
14128 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14130 || GET_MODE (target) != tmode
14131 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14132 target = gen_reg_rtx (tmode);
14133 pat = GEN_FCN (icode) (target, op0, op1);
14139 case IX86_BUILTIN_STOREHPS:
14140 case IX86_BUILTIN_STORELPS:
14141 case IX86_BUILTIN_STOREHPD:
14142 case IX86_BUILTIN_STORELPD:
14143 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14144 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14145 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14146 : CODE_FOR_sse2_movlpd);
14147 arg0 = TREE_VALUE (arglist);
14148 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14149 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14150 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14151 mode0 = insn_data[icode].operand[1].mode;
14152 mode1 = insn_data[icode].operand[2].mode;
14154 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14155 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14156 op1 = copy_to_mode_reg (mode1, op1);
14158 pat = GEN_FCN (icode) (op0, op0, op1);
14164 case IX86_BUILTIN_MOVNTPS:
14165 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14166 case IX86_BUILTIN_MOVNTQ:
14167 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14169 case IX86_BUILTIN_LDMXCSR:
14170 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14171 target = assign_386_stack_local (SImode, 0);
14172 emit_move_insn (target, op0);
14173 emit_insn (gen_ldmxcsr (target));
14176 case IX86_BUILTIN_STMXCSR:
14177 target = assign_386_stack_local (SImode, 0);
14178 emit_insn (gen_stmxcsr (target));
14179 return copy_to_mode_reg (SImode, target);
14181 case IX86_BUILTIN_SHUFPS:
14182 case IX86_BUILTIN_SHUFPD:
14183 icode = (fcode == IX86_BUILTIN_SHUFPS
14184 ? CODE_FOR_sse_shufps
14185 : CODE_FOR_sse2_shufpd);
14186 arg0 = TREE_VALUE (arglist);
14187 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14188 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14189 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14190 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14191 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14192 tmode = insn_data[icode].operand[0].mode;
14193 mode0 = insn_data[icode].operand[1].mode;
14194 mode1 = insn_data[icode].operand[2].mode;
14195 mode2 = insn_data[icode].operand[3].mode;
14197 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14198 op0 = copy_to_mode_reg (mode0, op0);
14199 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14200 op1 = copy_to_mode_reg (mode1, op1);
14201 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14203 /* @@@ better error message */
14204 error ("mask must be an immediate");
14205 return gen_reg_rtx (tmode);
14208 || GET_MODE (target) != tmode
14209 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14210 target = gen_reg_rtx (tmode);
14211 pat = GEN_FCN (icode) (target, op0, op1, op2);
14217 case IX86_BUILTIN_PSHUFW:
14218 case IX86_BUILTIN_PSHUFD:
14219 case IX86_BUILTIN_PSHUFHW:
14220 case IX86_BUILTIN_PSHUFLW:
14221 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14222 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14223 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14224 : CODE_FOR_mmx_pshufw);
14225 arg0 = TREE_VALUE (arglist);
14226 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14227 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14228 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14229 tmode = insn_data[icode].operand[0].mode;
14230 mode1 = insn_data[icode].operand[1].mode;
14231 mode2 = insn_data[icode].operand[2].mode;
14233 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14234 op0 = copy_to_mode_reg (mode1, op0);
14235 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14237 /* @@@ better error message */
14238 error ("mask must be an immediate");
14242 || GET_MODE (target) != tmode
14243 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14244 target = gen_reg_rtx (tmode);
14245 pat = GEN_FCN (icode) (target, op0, op1);
14251 case IX86_BUILTIN_PSLLDQI128:
14252 case IX86_BUILTIN_PSRLDQI128:
14253 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14254 : CODE_FOR_sse2_lshrti3);
14255 arg0 = TREE_VALUE (arglist);
14256 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14257 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14258 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14259 tmode = insn_data[icode].operand[0].mode;
14260 mode1 = insn_data[icode].operand[1].mode;
14261 mode2 = insn_data[icode].operand[2].mode;
14263 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14265 op0 = copy_to_reg (op0);
14266 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14268 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14270 error ("shift must be an immediate");
14273 target = gen_reg_rtx (V2DImode);
14274 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14280 case IX86_BUILTIN_FEMMS:
14281 emit_insn (gen_femms ());
14284 case IX86_BUILTIN_PAVGUSB:
14285 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14287 case IX86_BUILTIN_PF2ID:
14288 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14290 case IX86_BUILTIN_PFACC:
14291 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14293 case IX86_BUILTIN_PFADD:
14294 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14296 case IX86_BUILTIN_PFCMPEQ:
14297 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14299 case IX86_BUILTIN_PFCMPGE:
14300 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14302 case IX86_BUILTIN_PFCMPGT:
14303 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14305 case IX86_BUILTIN_PFMAX:
14306 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14308 case IX86_BUILTIN_PFMIN:
14309 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14311 case IX86_BUILTIN_PFMUL:
14312 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14314 case IX86_BUILTIN_PFRCP:
14315 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14317 case IX86_BUILTIN_PFRCPIT1:
14318 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14320 case IX86_BUILTIN_PFRCPIT2:
14321 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14323 case IX86_BUILTIN_PFRSQIT1:
14324 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14326 case IX86_BUILTIN_PFRSQRT:
14327 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14329 case IX86_BUILTIN_PFSUB:
14330 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14332 case IX86_BUILTIN_PFSUBR:
14333 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14335 case IX86_BUILTIN_PI2FD:
14336 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14338 case IX86_BUILTIN_PMULHRW:
14339 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14341 case IX86_BUILTIN_PF2IW:
14342 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14344 case IX86_BUILTIN_PFNACC:
14345 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14347 case IX86_BUILTIN_PFPNACC:
14348 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14350 case IX86_BUILTIN_PI2FW:
14351 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14353 case IX86_BUILTIN_PSWAPDSI:
14354 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14356 case IX86_BUILTIN_PSWAPDSF:
14357 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14359 case IX86_BUILTIN_SSE_ZERO:
14360 target = gen_reg_rtx (V4SFmode);
14361 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14364 case IX86_BUILTIN_MMX_ZERO:
14365 target = gen_reg_rtx (DImode);
14366 emit_insn (gen_mmx_clrdi (target));
14369 case IX86_BUILTIN_CLRTI:
14370 target = gen_reg_rtx (V2DImode);
14371 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14375 case IX86_BUILTIN_SQRTSD:
14376 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14377 case IX86_BUILTIN_LOADAPD:
14378 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14379 case IX86_BUILTIN_LOADUPD:
14380 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14382 case IX86_BUILTIN_STOREAPD:
14383 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14384 case IX86_BUILTIN_STOREUPD:
14385 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14387 case IX86_BUILTIN_LOADSD:
14388 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14390 case IX86_BUILTIN_STORESD:
14391 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14393 case IX86_BUILTIN_SETPD1:
14394 target = assign_386_stack_local (DFmode, 0);
14395 arg0 = TREE_VALUE (arglist);
14396 emit_move_insn (adjust_address (target, DFmode, 0),
14397 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14398 op0 = gen_reg_rtx (V2DFmode);
14399 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14400 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14403 case IX86_BUILTIN_SETPD:
14404 target = assign_386_stack_local (V2DFmode, 0);
14405 arg0 = TREE_VALUE (arglist);
14406 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14407 emit_move_insn (adjust_address (target, DFmode, 0),
14408 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14409 emit_move_insn (adjust_address (target, DFmode, 8),
14410 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14411 op0 = gen_reg_rtx (V2DFmode);
14412 emit_insn (gen_sse2_movapd (op0, target));
14415 case IX86_BUILTIN_LOADRPD:
14416 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14417 gen_reg_rtx (V2DFmode), 1);
14418 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14421 case IX86_BUILTIN_LOADPD1:
14422 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14423 gen_reg_rtx (V2DFmode), 1);
14424 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14427 case IX86_BUILTIN_STOREPD1:
14428 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14429 case IX86_BUILTIN_STORERPD:
14430 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14432 case IX86_BUILTIN_CLRPD:
14433 target = gen_reg_rtx (V2DFmode);
14434 emit_insn (gen_sse_clrv2df (target));
14437 case IX86_BUILTIN_MFENCE:
14438 emit_insn (gen_sse2_mfence ());
14440 case IX86_BUILTIN_LFENCE:
14441 emit_insn (gen_sse2_lfence ());
14444 case IX86_BUILTIN_CLFLUSH:
14445 arg0 = TREE_VALUE (arglist);
14446 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14447 icode = CODE_FOR_sse2_clflush;
14448 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14449 op0 = copy_to_mode_reg (Pmode, op0);
14451 emit_insn (gen_sse2_clflush (op0));
14454 case IX86_BUILTIN_MOVNTPD:
14455 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14456 case IX86_BUILTIN_MOVNTDQ:
14457 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14458 case IX86_BUILTIN_MOVNTI:
14459 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14461 case IX86_BUILTIN_LOADDQA:
14462 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14463 case IX86_BUILTIN_LOADDQU:
14464 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14465 case IX86_BUILTIN_LOADD:
14466 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14468 case IX86_BUILTIN_STOREDQA:
14469 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14470 case IX86_BUILTIN_STOREDQU:
14471 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14472 case IX86_BUILTIN_STORED:
14473 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14479 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14480 if (d->code == fcode)
14482 /* Compares are treated specially. */
14483 if (d->icode == CODE_FOR_maskcmpv4sf3
14484 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14485 || d->icode == CODE_FOR_maskncmpv4sf3
14486 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14487 || d->icode == CODE_FOR_maskcmpv2df3
14488 || d->icode == CODE_FOR_vmmaskcmpv2df3
14489 || d->icode == CODE_FOR_maskncmpv2df3
14490 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14491 return ix86_expand_sse_compare (d, arglist, target);
14493 return ix86_expand_binop_builtin (d->icode, arglist, target);
14496 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14497 if (d->code == fcode)
14498 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14500 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14501 if (d->code == fcode)
14502 return ix86_expand_sse_comi (d, arglist, target);
14504 /* @@@ Should really do something sensible here. */
14508 /* Store OPERAND to the memory after reload is completed. This means
14509 that we can't easily use assign_stack_local. */
14511 ix86_force_to_memory (mode, operand)
14512 enum machine_mode mode;
14516 if (!reload_completed)
14518 if (TARGET_64BIT && TARGET_RED_ZONE)
14520 result = gen_rtx_MEM (mode,
14521 gen_rtx_PLUS (Pmode,
14523 GEN_INT (-RED_ZONE_SIZE)));
14524 emit_move_insn (result, operand);
14526 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14532 operand = gen_lowpart (DImode, operand);
14536 gen_rtx_SET (VOIDmode,
14537 gen_rtx_MEM (DImode,
14538 gen_rtx_PRE_DEC (DImode,
14539 stack_pointer_rtx)),
14545 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14554 split_di (&operand, 1, operands, operands + 1);
14556 gen_rtx_SET (VOIDmode,
14557 gen_rtx_MEM (SImode,
14558 gen_rtx_PRE_DEC (Pmode,
14559 stack_pointer_rtx)),
14562 gen_rtx_SET (VOIDmode,
14563 gen_rtx_MEM (SImode,
14564 gen_rtx_PRE_DEC (Pmode,
14565 stack_pointer_rtx)),
14570 /* It is better to store HImodes as SImodes. */
14571 if (!TARGET_PARTIAL_REG_STALL)
14572 operand = gen_lowpart (SImode, operand);
14576 gen_rtx_SET (VOIDmode,
14577 gen_rtx_MEM (GET_MODE (operand),
14578 gen_rtx_PRE_DEC (SImode,
14579 stack_pointer_rtx)),
14585 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14590 /* Free operand from the memory. */
14592 ix86_free_from_memory (mode)
14593 enum machine_mode mode;
14595 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14599 if (mode == DImode || TARGET_64BIT)
14601 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14605 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14606 to pop or add instruction if registers are available. */
14607 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14608 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14613 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14614 QImode must go into class Q_REGS.
14615 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14616 movdf to do mem-to-mem moves through integer regs. */
14618 ix86_preferred_reload_class (x, class)
14620 enum reg_class class;
14622 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14624 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14626 /* SSE can't load any constant directly yet. */
14627 if (SSE_CLASS_P (class))
14629 /* Floats can load 0 and 1. */
14630 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14632 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14633 if (MAYBE_SSE_CLASS_P (class))
14634 return (reg_class_subset_p (class, GENERAL_REGS)
14635 ? GENERAL_REGS : FLOAT_REGS);
14639 /* General regs can load everything. */
14640 if (reg_class_subset_p (class, GENERAL_REGS))
14641 return GENERAL_REGS;
14642 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14643 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14646 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14648 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14653 /* If we are copying between general and FP registers, we need a memory
14654 location. The same is true for SSE and MMX registers.
14656 The macro can't work reliably when one of the CLASSES is class containing
14657 registers from multiple units (SSE, MMX, integer). We avoid this by never
14658 combining those units in single alternative in the machine description.
14659 Ensure that this constraint holds to avoid unexpected surprises.
14661 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14662 enforce these sanity checks. */
14664 ix86_secondary_memory_needed (class1, class2, mode, strict)
14665 enum reg_class class1, class2;
14666 enum machine_mode mode;
14669 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14670 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14671 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14672 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14673 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14674 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14681 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14682 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14683 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14684 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14685 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14687 /* Return the cost of moving data from a register in class CLASS1 to
14688 one in class CLASS2.
14690 It is not required that the cost always equal 2 when FROM is the same as TO;
14691 on some machines it is expensive to move between registers if they are not
14692 general registers. */
14694 ix86_register_move_cost (mode, class1, class2)
14695 enum machine_mode mode;
14696 enum reg_class class1, class2;
14698 /* In case we require secondary memory, compute cost of the store followed
14699 by load. In order to avoid bad register allocation choices, we need
14700 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14702 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14706 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14707 MEMORY_MOVE_COST (mode, class1, 1));
14708 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14709 MEMORY_MOVE_COST (mode, class2, 1));
14711 /* In case of copying from general_purpose_register we may emit multiple
14712 stores followed by single load causing memory size mismatch stall.
14713 Count this as arbitrarily high cost of 20. */
14714 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14717 /* In the case of FP/MMX moves, the registers actually overlap, and we
14718 have to switch modes in order to treat them differently. */
14719 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14720 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14726 /* Moves between SSE/MMX and integer unit are expensive. */
14727 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14728 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14729 return ix86_cost->mmxsse_to_integer;
14730 if (MAYBE_FLOAT_CLASS_P (class1))
14731 return ix86_cost->fp_move;
14732 if (MAYBE_SSE_CLASS_P (class1))
14733 return ix86_cost->sse_move;
14734 if (MAYBE_MMX_CLASS_P (class1))
14735 return ix86_cost->mmx_move;
14739 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14741 ix86_hard_regno_mode_ok (regno, mode)
14743 enum machine_mode mode;
14745 /* Flags and only flags can only hold CCmode values. */
14746 if (CC_REGNO_P (regno))
14747 return GET_MODE_CLASS (mode) == MODE_CC;
14748 if (GET_MODE_CLASS (mode) == MODE_CC
14749 || GET_MODE_CLASS (mode) == MODE_RANDOM
14750 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14752 if (FP_REGNO_P (regno))
14753 return VALID_FP_MODE_P (mode);
14754 if (SSE_REGNO_P (regno))
14755 return VALID_SSE_REG_MODE (mode);
14756 if (MMX_REGNO_P (regno))
14757 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14758 /* We handle both integer and floats in the general purpose registers.
14759 In future we should be able to handle vector modes as well. */
14760 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14762 /* Take care for QImode values - they can be in non-QI regs, but then
14763 they do cause partial register stalls. */
14764 if (regno < 4 || mode != QImode || TARGET_64BIT)
14766 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14769 /* Return the cost of moving data of mode M between a
14770 register and memory. A value of 2 is the default; this cost is
14771 relative to those in `REGISTER_MOVE_COST'.
14773 If moving between registers and memory is more expensive than
14774 between two registers, you should define this macro to express the
14777 Model also increased moving costs of QImode registers in non
14781 ix86_memory_move_cost (mode, class, in)
14782 enum machine_mode mode;
14783 enum reg_class class;
14786 if (FLOAT_CLASS_P (class))
14804 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14806 if (SSE_CLASS_P (class))
14809 switch (GET_MODE_SIZE (mode))
14823 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14825 if (MMX_CLASS_P (class))
14828 switch (GET_MODE_SIZE (mode))
14839 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14841 switch (GET_MODE_SIZE (mode))
14845 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14846 : ix86_cost->movzbl_load);
14848 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14849 : ix86_cost->int_store[0] + 4);
14852 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14854 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14855 if (mode == TFmode)
14857 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14858 * ((int) GET_MODE_SIZE (mode)
14859 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14863 /* Compute a (partial) cost for rtx X. Return true if the complete
14864 cost has been computed, and false if subexpressions should be
14865 scanned. In either case, *TOTAL contains the cost result. */
14868 ix86_rtx_costs (x, code, outer_code, total)
14870 int code, outer_code;
14873 enum machine_mode mode = GET_MODE (x);
14881 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14883 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14885 else if (flag_pic && SYMBOLIC_CONST (x))
14892 if (mode == VOIDmode)
14895 switch (standard_80387_constant_p (x))
14900 default: /* Other constants */
14905 /* Start with (MEM (SYMBOL_REF)), since that's where
14906 it'll probably end up. Add a penalty for size. */
14907 *total = (COSTS_N_INSNS (1)
14909 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14915 /* The zero extensions is often completely free on x86_64, so make
14916 it as cheap as possible. */
14917 if (TARGET_64BIT && mode == DImode
14918 && GET_MODE (XEXP (x, 0)) == SImode)
14920 else if (TARGET_ZERO_EXTEND_WITH_AND)
14921 *total = COSTS_N_INSNS (ix86_cost->add);
14923 *total = COSTS_N_INSNS (ix86_cost->movzx);
14927 *total = COSTS_N_INSNS (ix86_cost->movsx);
14931 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14932 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14934 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14937 *total = COSTS_N_INSNS (ix86_cost->add);
14940 if ((value == 2 || value == 3)
14941 && !TARGET_DECOMPOSE_LEA
14942 && ix86_cost->lea <= ix86_cost->shift_const)
14944 *total = COSTS_N_INSNS (ix86_cost->lea);
14954 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14956 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14958 if (INTVAL (XEXP (x, 1)) > 32)
14959 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14961 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14965 if (GET_CODE (XEXP (x, 1)) == AND)
14966 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14968 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14973 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14974 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14976 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14981 if (FLOAT_MODE_P (mode))
14982 *total = COSTS_N_INSNS (ix86_cost->fmul);
14983 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14985 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14988 for (nbits = 0; value != 0; value >>= 1)
14991 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14992 + nbits * ix86_cost->mult_bit);
14996 /* This is arbitrary */
14997 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14998 + 7 * ix86_cost->mult_bit);
15006 if (FLOAT_MODE_P (mode))
15007 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15009 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15013 if (FLOAT_MODE_P (mode))
15014 *total = COSTS_N_INSNS (ix86_cost->fadd);
15015 else if (!TARGET_DECOMPOSE_LEA
15016 && GET_MODE_CLASS (mode) == MODE_INT
15017 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15019 if (GET_CODE (XEXP (x, 0)) == PLUS
15020 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15021 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15022 && CONSTANT_P (XEXP (x, 1)))
15024 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15025 if (val == 2 || val == 4 || val == 8)
15027 *total = COSTS_N_INSNS (ix86_cost->lea);
15028 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15029 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15031 *total += rtx_cost (XEXP (x, 1), outer_code);
15035 else if (GET_CODE (XEXP (x, 0)) == MULT
15036 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15038 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15039 if (val == 2 || val == 4 || val == 8)
15041 *total = COSTS_N_INSNS (ix86_cost->lea);
15042 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15043 *total += rtx_cost (XEXP (x, 1), outer_code);
15047 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15049 *total = COSTS_N_INSNS (ix86_cost->lea);
15050 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15051 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15052 *total += rtx_cost (XEXP (x, 1), outer_code);
15059 if (FLOAT_MODE_P (mode))
15061 *total = COSTS_N_INSNS (ix86_cost->fadd);
15069 if (!TARGET_64BIT && mode == DImode)
15071 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15072 + (rtx_cost (XEXP (x, 0), outer_code)
15073 << (GET_MODE (XEXP (x, 0)) != DImode))
15074 + (rtx_cost (XEXP (x, 1), outer_code)
15075 << (GET_MODE (XEXP (x, 1)) != DImode)));
15081 if (FLOAT_MODE_P (mode))
15083 *total = COSTS_N_INSNS (ix86_cost->fchs);
15089 if (!TARGET_64BIT && mode == DImode)
15090 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15092 *total = COSTS_N_INSNS (ix86_cost->add);
15096 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15101 if (FLOAT_MODE_P (mode))
15102 *total = COSTS_N_INSNS (ix86_cost->fabs);
15106 if (FLOAT_MODE_P (mode))
15107 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15115 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15117 ix86_svr3_asm_out_constructor (symbol, priority)
15119 int priority ATTRIBUTE_UNUSED;
15122 fputs ("\tpushl $", asm_out_file);
15123 assemble_name (asm_out_file, XSTR (symbol, 0));
15124 fputc ('\n', asm_out_file);
15130 static int current_machopic_label_num;
15132 /* Given a symbol name and its associated stub, write out the
15133 definition of the stub. */
15136 machopic_output_stub (file, symb, stub)
15138 const char *symb, *stub;
15140 unsigned int length;
15141 char *binder_name, *symbol_name, lazy_ptr_name[32];
15142 int label = ++current_machopic_label_num;
15144 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15145 symb = (*targetm.strip_name_encoding) (symb);
15147 length = strlen (stub);
15148 binder_name = alloca (length + 32);
15149 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15151 length = strlen (symb);
15152 symbol_name = alloca (length + 32);
15153 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15155 sprintf (lazy_ptr_name, "L%d$lz", label);
15158 machopic_picsymbol_stub_section ();
15160 machopic_symbol_stub_section ();
15162 fprintf (file, "%s:\n", stub);
15163 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15167 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15168 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15169 fprintf (file, "\tjmp %%edx\n");
15172 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15174 fprintf (file, "%s:\n", binder_name);
15178 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15179 fprintf (file, "\tpushl %%eax\n");
15182 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15184 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15186 machopic_lazy_symbol_ptr_section ();
15187 fprintf (file, "%s:\n", lazy_ptr_name);
15188 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15189 fprintf (file, "\t.long %s\n", binder_name);
15191 #endif /* TARGET_MACHO */
15193 /* Order the registers for register allocator. */
15196 x86_order_regs_for_local_alloc ()
15201 /* First allocate the local general purpose registers. */
15202 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15203 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15204 reg_alloc_order [pos++] = i;
15206 /* Global general purpose registers. */
15207 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15208 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15209 reg_alloc_order [pos++] = i;
15211 /* x87 registers come first in case we are doing FP math
15213 if (!TARGET_SSE_MATH)
15214 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15215 reg_alloc_order [pos++] = i;
15217 /* SSE registers. */
15218 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15219 reg_alloc_order [pos++] = i;
15220 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15221 reg_alloc_order [pos++] = i;
15223 /* x87 registers. */
15224 if (TARGET_SSE_MATH)
15225 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15226 reg_alloc_order [pos++] = i;
15228 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15229 reg_alloc_order [pos++] = i;
15231 /* Initialize the rest of array as we do not allocate some registers
15233 while (pos < FIRST_PSEUDO_REGISTER)
15234 reg_alloc_order [pos++] = 0;
15237 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15238 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15241 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15242 struct attribute_spec.handler. */
15244 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15247 tree args ATTRIBUTE_UNUSED;
15248 int flags ATTRIBUTE_UNUSED;
15249 bool *no_add_attrs;
15252 if (DECL_P (*node))
15254 if (TREE_CODE (*node) == TYPE_DECL)
15255 type = &TREE_TYPE (*node);
15260 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15261 || TREE_CODE (*type) == UNION_TYPE)))
15263 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15264 *no_add_attrs = true;
15267 else if ((is_attribute_p ("ms_struct", name)
15268 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15269 || ((is_attribute_p ("gcc_struct", name)
15270 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15272 warning ("`%s' incompatible attribute ignored",
15273 IDENTIFIER_POINTER (name));
15274 *no_add_attrs = true;
15281 ix86_ms_bitfield_layout_p (record_type)
15284 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15285 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15286 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15289 /* Returns an expression indicating where the this parameter is
15290 located on entry to the FUNCTION. */
15293 x86_this_parameter (function)
15296 tree type = TREE_TYPE (function);
15300 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15301 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15304 if (ix86_fntype_regparm (type) > 0)
15308 parm = TYPE_ARG_TYPES (type);
15309 /* Figure out whether or not the function has a variable number of
15311 for (; parm; parm = TREE_CHAIN (parm))
15312 if (TREE_VALUE (parm) == void_type_node)
15314 /* If not, the this parameter is in %eax. */
15316 return gen_rtx_REG (SImode, 0);
15319 if (aggregate_value_p (TREE_TYPE (type)))
15320 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15322 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15325 /* Determine whether x86_output_mi_thunk can succeed. */
15328 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15329 tree thunk ATTRIBUTE_UNUSED;
15330 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15331 HOST_WIDE_INT vcall_offset;
15334 /* 64-bit can handle anything. */
15338 /* For 32-bit, everything's fine if we have one free register. */
15339 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15342 /* Need a free register for vcall_offset. */
15346 /* Need a free register for GOT references. */
15347 if (flag_pic && !(*targetm.binds_local_p) (function))
15350 /* Otherwise ok. */
15354 /* Output the assembler code for a thunk function. THUNK_DECL is the
15355 declaration for the thunk function itself, FUNCTION is the decl for
15356 the target function. DELTA is an immediate constant offset to be
15357 added to THIS. If VCALL_OFFSET is nonzero, the word at
15358 *(*this + vcall_offset) should be added to THIS. */
15361 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15362 FILE *file ATTRIBUTE_UNUSED;
15363 tree thunk ATTRIBUTE_UNUSED;
15364 HOST_WIDE_INT delta;
15365 HOST_WIDE_INT vcall_offset;
15369 rtx this = x86_this_parameter (function);
15372 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15373 pull it in now and let DELTA benefit. */
15376 else if (vcall_offset)
15378 /* Put the this parameter into %eax. */
15380 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15381 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15384 this_reg = NULL_RTX;
15386 /* Adjust the this parameter by a fixed constant. */
15389 xops[0] = GEN_INT (delta);
15390 xops[1] = this_reg ? this_reg : this;
15393 if (!x86_64_general_operand (xops[0], DImode))
15395 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15397 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15401 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15404 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15407 /* Adjust the this parameter by a value stored in the vtable. */
15411 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15413 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15415 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15418 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15420 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15422 /* Adjust the this parameter. */
15423 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15424 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15426 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15427 xops[0] = GEN_INT (vcall_offset);
15429 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15430 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15432 xops[1] = this_reg;
15434 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15436 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15439 /* If necessary, drop THIS back to its stack slot. */
15440 if (this_reg && this_reg != this)
15442 xops[0] = this_reg;
15444 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15447 xops[0] = DECL_RTL (function);
15450 if (!flag_pic || (*targetm.binds_local_p) (function))
15451 output_asm_insn ("jmp\t%P0", xops);
15454 tmp = XEXP (xops[0], 0);
15455 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15456 tmp = gen_rtx_CONST (Pmode, tmp);
15457 tmp = gen_rtx_MEM (QImode, tmp);
15459 output_asm_insn ("jmp\t%A0", xops);
15464 if (!flag_pic || (*targetm.binds_local_p) (function))
15465 output_asm_insn ("jmp\t%P0", xops);
15470 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15471 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15472 tmp = gen_rtx_MEM (QImode, tmp);
15474 output_asm_insn ("jmp\t%0", xops);
15477 #endif /* TARGET_MACHO */
15479 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15480 output_set_got (tmp);
15483 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15484 output_asm_insn ("jmp\t{*}%1", xops);
15490 x86_field_alignment (field, computed)
15494 enum machine_mode mode;
15495 tree type = TREE_TYPE (field);
15497 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15499 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15500 ? get_inner_array_type (type) : type);
15501 if (mode == DFmode || mode == DCmode
15502 || GET_MODE_CLASS (mode) == MODE_INT
15503 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15504 return MIN (32, computed);
15508 /* Output assembler code to FILE to increment profiler label # LABELNO
15509 for profiling a function entry. */
15511 x86_function_profiler (file, labelno)
15513 int labelno ATTRIBUTE_UNUSED;
15518 #ifndef NO_PROFILE_COUNTERS
15519 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15521 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15525 #ifndef NO_PROFILE_COUNTERS
15526 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15528 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15532 #ifndef NO_PROFILE_COUNTERS
15533 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15534 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15536 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15540 #ifndef NO_PROFILE_COUNTERS
15541 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15542 PROFILE_COUNT_REGISTER);
15544 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15548 /* Implement machine specific optimizations.
15549 At the moment we implement single transformation: AMD Athlon works faster
15550 when RET is not destination of conditional jump or directly preceded
15551 by other jump instruction. We avoid the penalty by inserting NOP just
15552 before the RET instructions in such cases. */
15554 x86_machine_dependent_reorg (first)
15555 rtx first ATTRIBUTE_UNUSED;
15559 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15561 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15563 basic_block bb = e->src;
15566 bool insert = false;
15568 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15570 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15571 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15573 if (prev && GET_CODE (prev) == CODE_LABEL)
15576 for (e = bb->pred; e; e = e->pred_next)
15577 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15578 && !(e->flags & EDGE_FALLTHRU))
15583 prev = prev_active_insn (ret);
15584 if (prev && GET_CODE (prev) == JUMP_INSN
15585 && any_condjump_p (prev))
15587 /* Empty functions get branch misspredict even when the jump destination
15588 is not visible to us. */
15589 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15593 emit_insn_before (gen_nop (), ret);
15597 /* Return nonzero when QImode register that must be represented via REX prefix
15600 x86_extended_QIreg_mentioned_p (insn)
15604 extract_insn_cached (insn);
15605 for (i = 0; i < recog_data.n_operands; i++)
15606 if (REG_P (recog_data.operand[i])
15607 && REGNO (recog_data.operand[i]) >= 4)
15612 /* Return nonzero when P points to register encoded via REX prefix.
15613 Called via for_each_rtx. */
15615 extended_reg_mentioned_1 (p, data)
15617 void *data ATTRIBUTE_UNUSED;
15619 unsigned int regno;
15622 regno = REGNO (*p);
15623 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15626 /* Return true when INSN mentions register that must be encoded using REX
15629 x86_extended_reg_mentioned_p (insn)
15632 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15635 /* Generate an unsigned DImode to FP conversion. This is the same code
15636 optabs would emit if we didn't have TFmode patterns. */
15639 x86_emit_floatuns (operands)
15642 rtx neglab, donelab, i0, i1, f0, in, out;
15643 enum machine_mode mode;
15646 in = force_reg (DImode, operands[1]);
15647 mode = GET_MODE (out);
15648 neglab = gen_label_rtx ();
15649 donelab = gen_label_rtx ();
15650 i1 = gen_reg_rtx (Pmode);
15651 f0 = gen_reg_rtx (mode);
15653 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15655 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15656 emit_jump_insn (gen_jump (donelab));
15659 emit_label (neglab);
15661 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15662 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15663 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15664 expand_float (f0, i0, 0);
15665 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15667 emit_label (donelab);
15670 /* Return if we do not know how to pass TYPE solely in registers. */
15672 ix86_must_pass_in_stack (mode, type)
15673 enum machine_mode mode;
15676 if (default_must_pass_in_stack (mode, type))
15678 return (!TARGET_64BIT && type && mode == TImode);
15681 #include "gt-i386.h"