1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
50 #ifndef CHECK_STACK_LIMIT
51 #define CHECK_STACK_LIMIT (-1)
54 /* Return index of given mode in mult and division cost tables. */
55 #define MODE_INDEX(mode) \
56 ((mode) == QImode ? 0 \
57 : (mode) == HImode ? 1 \
58 : (mode) == SImode ? 2 \
59 : (mode) == DImode ? 3 \
62 /* Processor costs (relative to an add) */
64 struct processor_costs size_cost = { /* costs for tunning for size */
65 2, /* cost of an add instruction */
66 3, /* cost of a lea instruction */
67 2, /* variable shift costs */
68 3, /* constant shift costs */
69 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
70 0, /* cost of multiply per each bit set */
71 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
72 3, /* cost of movsx */
73 3, /* cost of movzx */
76 2, /* cost for loading QImode using movzbl */
77 {2, 2, 2}, /* cost of loading integer registers
78 in QImode, HImode and SImode.
79 Relative to reg-reg move (2). */
80 {2, 2, 2}, /* cost of storing integer registers */
81 2, /* cost of reg,reg fld/fst */
82 {2, 2, 2}, /* cost of loading fp registers
83 in SFmode, DFmode and XFmode */
84 {2, 2, 2}, /* cost of loading integer registers */
85 3, /* cost of moving MMX register */
86 {3, 3}, /* cost of loading MMX registers
87 in SImode and DImode */
88 {3, 3}, /* cost of storing MMX registers
89 in SImode and DImode */
90 3, /* cost of moving SSE register */
91 {3, 3, 3}, /* cost of loading SSE registers
92 in SImode, DImode and TImode */
93 {3, 3, 3}, /* cost of storing SSE registers
94 in SImode, DImode and TImode */
95 3, /* MMX or SSE register to integer */
96 0, /* size of prefetch block */
97 0, /* number of parallel prefetches */
99 2, /* cost of FADD and FSUB insns. */
100 2, /* cost of FMUL instruction. */
101 2, /* cost of FDIV instruction. */
102 2, /* cost of FABS instruction. */
103 2, /* cost of FCHS instruction. */
104 2, /* cost of FSQRT instruction. */
107 /* Processor costs (relative to an add) */
109 struct processor_costs i386_cost = { /* 386 specific costs */
110 1, /* cost of an add instruction */
111 1, /* cost of a lea instruction */
112 3, /* variable shift costs */
113 2, /* constant shift costs */
114 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
115 1, /* cost of multiply per each bit set */
116 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
117 3, /* cost of movsx */
118 2, /* cost of movzx */
119 15, /* "large" insn */
121 4, /* cost for loading QImode using movzbl */
122 {2, 4, 2}, /* cost of loading integer registers
123 in QImode, HImode and SImode.
124 Relative to reg-reg move (2). */
125 {2, 4, 2}, /* cost of storing integer registers */
126 2, /* cost of reg,reg fld/fst */
127 {8, 8, 8}, /* cost of loading fp registers
128 in SFmode, DFmode and XFmode */
129 {8, 8, 8}, /* cost of loading integer registers */
130 2, /* cost of moving MMX register */
131 {4, 8}, /* cost of loading MMX registers
132 in SImode and DImode */
133 {4, 8}, /* cost of storing MMX registers
134 in SImode and DImode */
135 2, /* cost of moving SSE register */
136 {4, 8, 16}, /* cost of loading SSE registers
137 in SImode, DImode and TImode */
138 {4, 8, 16}, /* cost of storing SSE registers
139 in SImode, DImode and TImode */
140 3, /* MMX or SSE register to integer */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 23, /* cost of FADD and FSUB insns. */
145 27, /* cost of FMUL instruction. */
146 88, /* cost of FDIV instruction. */
147 22, /* cost of FABS instruction. */
148 24, /* cost of FCHS instruction. */
149 122, /* cost of FSQRT instruction. */
153 struct processor_costs i486_cost = { /* 486 specific costs */
154 1, /* cost of an add instruction */
155 1, /* cost of a lea instruction */
156 3, /* variable shift costs */
157 2, /* constant shift costs */
158 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
159 1, /* cost of multiply per each bit set */
160 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
161 3, /* cost of movsx */
162 2, /* cost of movzx */
163 15, /* "large" insn */
165 4, /* cost for loading QImode using movzbl */
166 {2, 4, 2}, /* cost of loading integer registers
167 in QImode, HImode and SImode.
168 Relative to reg-reg move (2). */
169 {2, 4, 2}, /* cost of storing integer registers */
170 2, /* cost of reg,reg fld/fst */
171 {8, 8, 8}, /* cost of loading fp registers
172 in SFmode, DFmode and XFmode */
173 {8, 8, 8}, /* cost of loading integer registers */
174 2, /* cost of moving MMX register */
175 {4, 8}, /* cost of loading MMX registers
176 in SImode and DImode */
177 {4, 8}, /* cost of storing MMX registers
178 in SImode and DImode */
179 2, /* cost of moving SSE register */
180 {4, 8, 16}, /* cost of loading SSE registers
181 in SImode, DImode and TImode */
182 {4, 8, 16}, /* cost of storing SSE registers
183 in SImode, DImode and TImode */
184 3, /* MMX or SSE register to integer */
185 0, /* size of prefetch block */
186 0, /* number of parallel prefetches */
188 8, /* cost of FADD and FSUB insns. */
189 16, /* cost of FMUL instruction. */
190 73, /* cost of FDIV instruction. */
191 3, /* cost of FABS instruction. */
192 3, /* cost of FCHS instruction. */
193 83, /* cost of FSQRT instruction. */
197 struct processor_costs pentium_cost = {
198 1, /* cost of an add instruction */
199 1, /* cost of a lea instruction */
200 4, /* variable shift costs */
201 1, /* constant shift costs */
202 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
203 0, /* cost of multiply per each bit set */
204 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
205 3, /* cost of movsx */
206 2, /* cost of movzx */
207 8, /* "large" insn */
209 6, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {2, 2, 6}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {4, 4, 6}, /* cost of loading integer registers */
218 8, /* cost of moving MMX register */
219 {8, 8}, /* cost of loading MMX registers
220 in SImode and DImode */
221 {8, 8}, /* cost of storing MMX registers
222 in SImode and DImode */
223 2, /* cost of moving SSE register */
224 {4, 8, 16}, /* cost of loading SSE registers
225 in SImode, DImode and TImode */
226 {4, 8, 16}, /* cost of storing SSE registers
227 in SImode, DImode and TImode */
228 3, /* MMX or SSE register to integer */
229 0, /* size of prefetch block */
230 0, /* number of parallel prefetches */
232 3, /* cost of FADD and FSUB insns. */
233 3, /* cost of FMUL instruction. */
234 39, /* cost of FDIV instruction. */
235 1, /* cost of FABS instruction. */
236 1, /* cost of FCHS instruction. */
237 70, /* cost of FSQRT instruction. */
241 struct processor_costs pentiumpro_cost = {
242 1, /* cost of an add instruction */
243 1, /* cost of a lea instruction */
244 1, /* variable shift costs */
245 1, /* constant shift costs */
246 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
247 0, /* cost of multiply per each bit set */
248 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
249 1, /* cost of movsx */
250 1, /* cost of movzx */
251 8, /* "large" insn */
253 2, /* cost for loading QImode using movzbl */
254 {4, 4, 4}, /* cost of loading integer registers
255 in QImode, HImode and SImode.
256 Relative to reg-reg move (2). */
257 {2, 2, 2}, /* cost of storing integer registers */
258 2, /* cost of reg,reg fld/fst */
259 {2, 2, 6}, /* cost of loading fp registers
260 in SFmode, DFmode and XFmode */
261 {4, 4, 6}, /* cost of loading integer registers */
262 2, /* cost of moving MMX register */
263 {2, 2}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {2, 2}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {2, 2, 8}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {2, 2, 8}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 32, /* size of prefetch block */
274 6, /* number of parallel prefetches */
276 3, /* cost of FADD and FSUB insns. */
277 5, /* cost of FMUL instruction. */
278 56, /* cost of FDIV instruction. */
279 2, /* cost of FABS instruction. */
280 2, /* cost of FCHS instruction. */
281 56, /* cost of FSQRT instruction. */
285 struct processor_costs k6_cost = {
286 1, /* cost of an add instruction */
287 2, /* cost of a lea instruction */
288 1, /* variable shift costs */
289 1, /* constant shift costs */
290 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
291 0, /* cost of multiply per each bit set */
292 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
293 2, /* cost of movsx */
294 2, /* cost of movzx */
295 8, /* "large" insn */
297 3, /* cost for loading QImode using movzbl */
298 {4, 5, 4}, /* cost of loading integer registers
299 in QImode, HImode and SImode.
300 Relative to reg-reg move (2). */
301 {2, 3, 2}, /* cost of storing integer registers */
302 4, /* cost of reg,reg fld/fst */
303 {6, 6, 6}, /* cost of loading fp registers
304 in SFmode, DFmode and XFmode */
305 {4, 4, 4}, /* cost of loading integer registers */
306 2, /* cost of moving MMX register */
307 {2, 2}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {2, 2}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {2, 2, 8}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {2, 2, 8}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 6, /* MMX or SSE register to integer */
317 32, /* size of prefetch block */
318 1, /* number of parallel prefetches */
320 2, /* cost of FADD and FSUB insns. */
321 2, /* cost of FMUL instruction. */
322 56, /* cost of FDIV instruction. */
323 2, /* cost of FABS instruction. */
324 2, /* cost of FCHS instruction. */
325 56, /* cost of FSQRT instruction. */
329 struct processor_costs athlon_cost = {
330 1, /* cost of an add instruction */
331 2, /* cost of a lea instruction */
332 1, /* variable shift costs */
333 1, /* constant shift costs */
334 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
335 0, /* cost of multiply per each bit set */
336 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
337 1, /* cost of movsx */
338 1, /* cost of movzx */
339 8, /* "large" insn */
341 4, /* cost for loading QImode using movzbl */
342 {3, 4, 3}, /* cost of loading integer registers
343 in QImode, HImode and SImode.
344 Relative to reg-reg move (2). */
345 {3, 4, 3}, /* cost of storing integer registers */
346 4, /* cost of reg,reg fld/fst */
347 {4, 4, 12}, /* cost of loading fp registers
348 in SFmode, DFmode and XFmode */
349 {6, 6, 8}, /* cost of loading integer registers */
350 2, /* cost of moving MMX register */
351 {4, 4}, /* cost of loading MMX registers
352 in SImode and DImode */
353 {4, 4}, /* cost of storing MMX registers
354 in SImode and DImode */
355 2, /* cost of moving SSE register */
356 {4, 4, 6}, /* cost of loading SSE registers
357 in SImode, DImode and TImode */
358 {4, 4, 5}, /* cost of storing SSE registers
359 in SImode, DImode and TImode */
360 5, /* MMX or SSE register to integer */
361 64, /* size of prefetch block */
362 6, /* number of parallel prefetches */
364 4, /* cost of FADD and FSUB insns. */
365 4, /* cost of FMUL instruction. */
366 24, /* cost of FDIV instruction. */
367 2, /* cost of FABS instruction. */
368 2, /* cost of FCHS instruction. */
369 35, /* cost of FSQRT instruction. */
373 struct processor_costs k8_cost = {
374 1, /* cost of an add instruction */
375 2, /* cost of a lea instruction */
376 1, /* variable shift costs */
377 1, /* constant shift costs */
378 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
379 0, /* cost of multiply per each bit set */
380 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
381 1, /* cost of movsx */
382 1, /* cost of movzx */
383 8, /* "large" insn */
385 4, /* cost for loading QImode using movzbl */
386 {3, 4, 3}, /* cost of loading integer registers
387 in QImode, HImode and SImode.
388 Relative to reg-reg move (2). */
389 {3, 4, 3}, /* cost of storing integer registers */
390 4, /* cost of reg,reg fld/fst */
391 {4, 4, 12}, /* cost of loading fp registers
392 in SFmode, DFmode and XFmode */
393 {6, 6, 8}, /* cost of loading integer registers */
394 2, /* cost of moving MMX register */
395 {3, 3}, /* cost of loading MMX registers
396 in SImode and DImode */
397 {4, 4}, /* cost of storing MMX registers
398 in SImode and DImode */
399 2, /* cost of moving SSE register */
400 {4, 3, 6}, /* cost of loading SSE registers
401 in SImode, DImode and TImode */
402 {4, 4, 5}, /* cost of storing SSE registers
403 in SImode, DImode and TImode */
404 5, /* MMX or SSE register to integer */
405 64, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 4, /* cost of FADD and FSUB insns. */
409 4, /* cost of FMUL instruction. */
410 19, /* cost of FDIV instruction. */
411 2, /* cost of FABS instruction. */
412 2, /* cost of FCHS instruction. */
413 35, /* cost of FSQRT instruction. */
417 struct processor_costs pentium4_cost = {
418 1, /* cost of an add instruction */
419 1, /* cost of a lea instruction */
420 4, /* variable shift costs */
421 4, /* constant shift costs */
422 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
423 0, /* cost of multiply per each bit set */
424 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
425 1, /* cost of movsx */
426 1, /* cost of movzx */
427 16, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 5, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 3, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of loading integer registers */
438 2, /* cost of moving MMX register */
439 {2, 2}, /* cost of loading MMX registers
440 in SImode and DImode */
441 {2, 2}, /* cost of storing MMX registers
442 in SImode and DImode */
443 12, /* cost of moving SSE register */
444 {12, 12, 12}, /* cost of loading SSE registers
445 in SImode, DImode and TImode */
446 {2, 2, 8}, /* cost of storing SSE registers
447 in SImode, DImode and TImode */
448 10, /* MMX or SSE register to integer */
449 64, /* size of prefetch block */
450 6, /* number of parallel prefetches */
452 5, /* cost of FADD and FSUB insns. */
453 7, /* cost of FMUL instruction. */
454 43, /* cost of FDIV instruction. */
455 2, /* cost of FABS instruction. */
456 2, /* cost of FCHS instruction. */
457 43, /* cost of FSQRT instruction. */
460 const struct processor_costs *ix86_cost = &pentium_cost;
462 /* Processor feature/optimization bitmasks. */
463 #define m_386 (1<<PROCESSOR_I386)
464 #define m_486 (1<<PROCESSOR_I486)
465 #define m_PENT (1<<PROCESSOR_PENTIUM)
466 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
467 #define m_K6 (1<<PROCESSOR_K6)
468 #define m_ATHLON (1<<PROCESSOR_ATHLON)
469 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
470 #define m_K8 (1<<PROCESSOR_K8)
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
473 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
474 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
475 const int x86_zero_extend_with_and = m_486 | m_PENT;
476 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
477 const int x86_double_with_add = ~m_386;
478 const int x86_use_bit_test = m_386;
479 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
480 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
481 const int x86_3dnow_a = m_ATHLON_K8;
482 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
483 const int x86_branch_hints = m_PENT4;
484 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
485 const int x86_partial_reg_stall = m_PPRO;
486 const int x86_use_loop = m_K6;
487 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
488 const int x86_use_mov0 = m_K6;
489 const int x86_use_cltd = ~(m_PENT | m_K6);
490 const int x86_read_modify_write = ~m_PENT;
491 const int x86_read_modify = ~(m_PENT | m_PPRO);
492 const int x86_split_long_moves = m_PPRO;
493 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
494 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
495 const int x86_single_stringop = m_386 | m_PENT4;
496 const int x86_qimode_math = ~(0);
497 const int x86_promote_qi_regs = 0;
498 const int x86_himode_math = ~(m_PPRO);
499 const int x86_promote_hi_regs = m_PPRO;
500 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
501 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
502 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
503 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
504 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
505 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
506 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
510 const int x86_decompose_lea = m_PENT4;
511 const int x86_shift1 = ~m_486;
512 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
513 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
514 /* Set for machines where the type and dependencies are resolved on SSE register
515 parts instead of whole registers, so we may maintain just lower part of
516 scalar values in proper format leaving the upper part undefined. */
517 const int x86_sse_partial_regs = m_ATHLON_K8;
518 /* Athlon optimizes partial-register FPS special case, thus avoiding the
519 need for extra instructions beforehand */
520 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
521 const int x86_sse_typeless_stores = m_ATHLON_K8;
522 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
523 const int x86_use_ffreep = m_ATHLON_K8;
524 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 const int x86_inter_unit_moves = ~(m_ATHLON_K8);
526 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
528 /* In case the average insn count for single function invocation is
529 lower than this constant, emit fast (but longer) prologue and
531 #define FAST_PROLOGUE_INSN_COUNT 20
533 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
534 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
535 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
536 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
538 /* Array of the smallest class containing reg number REGNO, indexed by
539 REGNO. Used by REGNO_REG_CLASS in i386.h. */
541 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
544 AREG, DREG, CREG, BREG,
546 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
548 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
549 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
552 /* flags, fpsr, dirflag, frame */
553 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
554 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
556 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
558 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
564 /* The "default" register map used in 32bit mode. */
566 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
568 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
569 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
570 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
571 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
572 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
573 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
577 static int const x86_64_int_parameter_registers[6] =
579 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
580 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
583 static int const x86_64_int_return_registers[4] =
585 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
588 /* The "default" register map used in 64bit mode. */
589 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
591 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
592 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
593 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
594 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
595 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
596 8,9,10,11,12,13,14,15, /* extended integer registers */
597 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
600 /* Define the register numbers to be used in Dwarf debugging information.
601 The SVR4 reference port C compiler uses the following register numbers
602 in its Dwarf output code:
603 0 for %eax (gcc regno = 0)
604 1 for %ecx (gcc regno = 2)
605 2 for %edx (gcc regno = 1)
606 3 for %ebx (gcc regno = 3)
607 4 for %esp (gcc regno = 7)
608 5 for %ebp (gcc regno = 6)
609 6 for %esi (gcc regno = 4)
610 7 for %edi (gcc regno = 5)
611 The following three DWARF register numbers are never generated by
612 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
613 believes these numbers have these meanings.
614 8 for %eip (no gcc equivalent)
615 9 for %eflags (gcc regno = 17)
616 10 for %trapno (no gcc equivalent)
617 It is not at all clear how we should number the FP stack registers
618 for the x86 architecture. If the version of SDB on x86/svr4 were
619 a bit less brain dead with respect to floating-point then we would
620 have a precedent to follow with respect to DWARF register numbers
621 for x86 FP registers, but the SDB on x86/svr4 is so completely
622 broken with respect to FP registers that it is hardly worth thinking
623 of it as something to strive for compatibility with.
624 The version of x86/svr4 SDB I have at the moment does (partially)
625 seem to believe that DWARF register number 11 is associated with
626 the x86 register %st(0), but that's about all. Higher DWARF
627 register numbers don't seem to be associated with anything in
628 particular, and even for DWARF regno 11, SDB only seems to under-
629 stand that it should say that a variable lives in %st(0) (when
630 asked via an `=' command) if we said it was in DWARF regno 11,
631 but SDB still prints garbage when asked for the value of the
632 variable in question (via a `/' command).
633 (Also note that the labels SDB prints for various FP stack regs
634 when doing an `x' command are all wrong.)
635 Note that these problems generally don't affect the native SVR4
636 C compiler because it doesn't allow the use of -O with -g and
637 because when it is *not* optimizing, it allocates a memory
638 location for each floating-point variable, and the memory
639 location is what gets described in the DWARF AT_location
640 attribute for the variable in question.
641 Regardless of the severe mental illness of the x86/svr4 SDB, we
642 do something sensible here and we use the following DWARF
643 register numbers. Note that these are all stack-top-relative
645 11 for %st(0) (gcc regno = 8)
646 12 for %st(1) (gcc regno = 9)
647 13 for %st(2) (gcc regno = 10)
648 14 for %st(3) (gcc regno = 11)
649 15 for %st(4) (gcc regno = 12)
650 16 for %st(5) (gcc regno = 13)
651 17 for %st(6) (gcc regno = 14)
652 18 for %st(7) (gcc regno = 15)
654 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
656 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
657 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
658 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
659 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
660 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
661 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
665 /* Test and compare insns in i386.md store the information needed to
666 generate branch and scc insns here. */
668 rtx ix86_compare_op0 = NULL_RTX;
669 rtx ix86_compare_op1 = NULL_RTX;
671 #define MAX_386_STACK_LOCALS 3
672 /* Size of the register save area. */
673 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
675 /* Define the structure for the machine field in struct function. */
677 struct stack_local_entry GTY(())
682 struct stack_local_entry *next;
685 /* Structure describing stack frame layout.
686 Stack grows downward:
692 saved frame pointer if frame_pointer_needed
693 <- HARD_FRAME_POINTER
699 > to_allocate <- FRAME_POINTER
711 int outgoing_arguments_size;
714 HOST_WIDE_INT to_allocate;
715 /* The offsets relative to ARG_POINTER. */
716 HOST_WIDE_INT frame_pointer_offset;
717 HOST_WIDE_INT hard_frame_pointer_offset;
718 HOST_WIDE_INT stack_pointer_offset;
720 /* When save_regs_using_mov is set, emit prologue using
721 move instead of push instructions. */
722 bool save_regs_using_mov;
725 /* Used to enable/disable debugging features. */
726 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
727 /* Code model option as passed by user. */
728 const char *ix86_cmodel_string;
730 enum cmodel ix86_cmodel;
732 const char *ix86_asm_string;
733 enum asm_dialect ix86_asm_dialect = ASM_ATT;
735 const char *ix86_tls_dialect_string;
736 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
738 /* Which unit we are generating floating point math for. */
739 enum fpmath_unit ix86_fpmath;
741 /* Which cpu are we scheduling for. */
742 enum processor_type ix86_tune;
743 /* Which instruction set architecture to use. */
744 enum processor_type ix86_arch;
746 /* Strings to hold which cpu and instruction set architecture to use. */
747 const char *ix86_tune_string; /* for -mtune=<xxx> */
748 const char *ix86_arch_string; /* for -march=<xxx> */
749 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
751 /* # of registers to use to pass arguments. */
752 const char *ix86_regparm_string;
754 /* true if sse prefetch instruction is not NOOP. */
755 int x86_prefetch_sse;
757 /* ix86_regparm_string as a number */
760 /* Alignment to use for loops and jumps: */
762 /* Power of two alignment for loops. */
763 const char *ix86_align_loops_string;
765 /* Power of two alignment for non-loop jumps. */
766 const char *ix86_align_jumps_string;
768 /* Power of two alignment for stack boundary in bytes. */
769 const char *ix86_preferred_stack_boundary_string;
771 /* Preferred alignment for stack boundary in bits. */
772 int ix86_preferred_stack_boundary;
774 /* Values 1-5: see jump.c */
775 int ix86_branch_cost;
776 const char *ix86_branch_cost_string;
778 /* Power of two alignment for functions. */
779 const char *ix86_align_funcs_string;
781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
782 static char internal_label_prefix[16];
783 static int internal_label_prefix_len;
785 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
786 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
787 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
788 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
790 static const char *get_some_local_dynamic_name PARAMS ((void));
791 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
792 static rtx maybe_get_pool_constant PARAMS ((rtx));
793 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
794 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
796 static rtx get_thread_pointer PARAMS ((int));
797 static rtx legitimize_tls_address PARAMS ((rtx, enum tls_model, int));
798 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
799 static rtx gen_push PARAMS ((rtx));
800 static int memory_address_length PARAMS ((rtx addr));
801 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
802 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
803 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
804 static void ix86_dump_ppro_packet PARAMS ((FILE *));
805 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
806 static struct machine_function * ix86_init_machine_status PARAMS ((void));
807 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
808 static int ix86_nsaved_regs PARAMS ((void));
809 static void ix86_emit_save_regs PARAMS ((void));
810 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
811 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
812 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
813 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
814 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
815 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
816 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
817 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
818 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
819 static int ix86_issue_rate PARAMS ((void));
820 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
821 static void ix86_sched_init PARAMS ((FILE *, int, int));
822 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
823 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
824 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
825 static int ia32_multipass_dfa_lookahead PARAMS ((void));
826 static void ix86_init_mmx_sse_builtins PARAMS ((void));
827 static rtx x86_this_parameter PARAMS ((tree));
828 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
829 HOST_WIDE_INT, tree));
830 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static void x86_file_start PARAMS ((void));
833 static void ix86_reorg PARAMS ((void));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
840 enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
843 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
844 static int ix86_address_cost PARAMS ((rtx));
845 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
846 static rtx ix86_delegitimize_address PARAMS ((rtx));
848 struct builtin_description;
849 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
851 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
853 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
854 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
855 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
856 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
857 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
858 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
859 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
863 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
865 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
866 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
867 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
869 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
870 static int ix86_save_reg PARAMS ((unsigned int, int));
871 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
872 static int ix86_comp_type_attributes PARAMS ((tree, tree));
873 static int ix86_fntype_regparm PARAMS ((tree));
874 const struct attribute_spec ix86_attribute_table[];
875 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
876 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
877 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
878 static int ix86_value_regno PARAMS ((enum machine_mode));
879 static bool contains_128bit_aligned_vector_p PARAMS ((tree));
880 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
881 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
883 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
884 static int min_insn_size PARAMS ((rtx));
885 static void k8_avoid_jump_misspredicts PARAMS ((void));
887 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
888 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
891 /* Register class used for passing given 64bit part of the argument.
892 These represent classes as documented by the PS ABI, with the exception
893 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
894 use SF or DFmode move instead of DImode to avoid reformatting penalties.
896 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
897 whenever possible (upper half does contain padding).
899 enum x86_64_reg_class
902 X86_64_INTEGER_CLASS,
903 X86_64_INTEGERSI_CLASS,
912 static const char * const x86_64_reg_class_name[] =
913 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
915 #define MAX_CLASSES 4
916 static int classify_argument PARAMS ((enum machine_mode, tree,
917 enum x86_64_reg_class [MAX_CLASSES],
919 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
921 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
923 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
924 enum x86_64_reg_class));
926 /* Table of constants used by fldpi, fldln2, etc... */
927 static REAL_VALUE_TYPE ext_80387_constants_table [5];
928 static bool ext_80387_constants_init = 0;
929 static void init_ext_80387_constants PARAMS ((void));
931 /* Initialize the GCC target structure. */
932 #undef TARGET_ATTRIBUTE_TABLE
933 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
934 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
935 # undef TARGET_MERGE_DECL_ATTRIBUTES
936 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
939 #undef TARGET_COMP_TYPE_ATTRIBUTES
940 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
942 #undef TARGET_INIT_BUILTINS
943 #define TARGET_INIT_BUILTINS ix86_init_builtins
945 #undef TARGET_EXPAND_BUILTIN
946 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
948 #undef TARGET_ASM_FUNCTION_EPILOGUE
949 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
951 #undef TARGET_ASM_OPEN_PAREN
952 #define TARGET_ASM_OPEN_PAREN ""
953 #undef TARGET_ASM_CLOSE_PAREN
954 #define TARGET_ASM_CLOSE_PAREN ""
956 #undef TARGET_ASM_ALIGNED_HI_OP
957 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
958 #undef TARGET_ASM_ALIGNED_SI_OP
959 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
961 #undef TARGET_ASM_ALIGNED_DI_OP
962 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
965 #undef TARGET_ASM_UNALIGNED_HI_OP
966 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
967 #undef TARGET_ASM_UNALIGNED_SI_OP
968 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
969 #undef TARGET_ASM_UNALIGNED_DI_OP
970 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
972 #undef TARGET_SCHED_ADJUST_COST
973 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
974 #undef TARGET_SCHED_ISSUE_RATE
975 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
976 #undef TARGET_SCHED_VARIABLE_ISSUE
977 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
978 #undef TARGET_SCHED_INIT
979 #define TARGET_SCHED_INIT ix86_sched_init
980 #undef TARGET_SCHED_REORDER
981 #define TARGET_SCHED_REORDER ix86_sched_reorder
982 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
983 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
984 ia32_use_dfa_pipeline_interface
985 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
986 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
987 ia32_multipass_dfa_lookahead
989 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
990 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
993 #undef TARGET_HAVE_TLS
994 #define TARGET_HAVE_TLS true
996 #undef TARGET_CANNOT_FORCE_CONST_MEM
997 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
999 #undef TARGET_DELEGITIMIZE_ADDRESS
1000 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1002 #undef TARGET_MS_BITFIELD_LAYOUT_P
1003 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1005 #undef TARGET_ASM_OUTPUT_MI_THUNK
1006 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1007 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1008 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1010 #undef TARGET_ASM_FILE_START
1011 #define TARGET_ASM_FILE_START x86_file_start
1013 #undef TARGET_RTX_COSTS
1014 #define TARGET_RTX_COSTS ix86_rtx_costs
1015 #undef TARGET_ADDRESS_COST
1016 #define TARGET_ADDRESS_COST ix86_address_cost
1018 #undef TARGET_MACHINE_DEPENDENT_REORG
1019 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1021 struct gcc_target targetm = TARGET_INITIALIZER;
1023 /* The svr4 ABI for the i386 says that records and unions are returned
1025 #ifndef DEFAULT_PCC_STRUCT_RETURN
1026 #define DEFAULT_PCC_STRUCT_RETURN 1
1029 /* Sometimes certain combinations of command options do not make
1030 sense on a particular target machine. You can define a macro
1031 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1032 defined, is executed once just after all the command options have
1035 Don't use this macro to turn on various extra optimizations for
1036 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1042 /* Comes from final.c -- no real reason to change it. */
1043 #define MAX_CODE_ALIGN 16
1047 const struct processor_costs *cost; /* Processor costs */
1048 const int target_enable; /* Target flags to enable. */
1049 const int target_disable; /* Target flags to disable. */
1050 const int align_loop; /* Default alignments. */
1051 const int align_loop_max_skip;
1052 const int align_jump;
1053 const int align_jump_max_skip;
1054 const int align_func;
1056 const processor_target_table[PROCESSOR_max] =
1058 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1059 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1060 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1061 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1062 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1063 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1064 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1065 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1068 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1071 const char *const name; /* processor name or nickname. */
1072 const enum processor_type processor;
1073 const enum pta_flags
1078 PTA_PREFETCH_SSE = 8,
1084 const processor_alias_table[] =
1086 {"i386", PROCESSOR_I386, 0},
1087 {"i486", PROCESSOR_I486, 0},
1088 {"i586", PROCESSOR_PENTIUM, 0},
1089 {"pentium", PROCESSOR_PENTIUM, 0},
1090 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1091 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1092 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1093 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1094 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1095 {"i686", PROCESSOR_PENTIUMPRO, 0},
1096 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1097 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1098 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1099 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1100 PTA_MMX | PTA_PREFETCH_SSE},
1101 {"k6", PROCESSOR_K6, PTA_MMX},
1102 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1103 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1104 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1106 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1107 | PTA_3DNOW | PTA_3DNOW_A},
1108 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1109 | PTA_3DNOW_A | PTA_SSE},
1110 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1111 | PTA_3DNOW_A | PTA_SSE},
1112 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1113 | PTA_3DNOW_A | PTA_SSE},
1114 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1115 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1118 int const pta_size = ARRAY_SIZE (processor_alias_table);
1120 /* By default our XFmode is the 80-bit extended format. If we have
1121 use TFmode instead, it's also the 80-bit format, but with padding. */
1122 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1123 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1125 /* Set the default values for switches whose default depends on TARGET_64BIT
1126 in case they weren't overwritten by command line options. */
1129 if (flag_omit_frame_pointer == 2)
1130 flag_omit_frame_pointer = 1;
1131 if (flag_asynchronous_unwind_tables == 2)
1132 flag_asynchronous_unwind_tables = 1;
1133 if (flag_pcc_struct_return == 2)
1134 flag_pcc_struct_return = 0;
1138 if (flag_omit_frame_pointer == 2)
1139 flag_omit_frame_pointer = 0;
1140 if (flag_asynchronous_unwind_tables == 2)
1141 flag_asynchronous_unwind_tables = 0;
1142 if (flag_pcc_struct_return == 2)
1143 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1146 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1147 SUBTARGET_OVERRIDE_OPTIONS;
1150 if (!ix86_tune_string && ix86_arch_string)
1151 ix86_tune_string = ix86_arch_string;
1152 if (!ix86_tune_string)
1153 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1154 if (!ix86_arch_string)
1155 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1157 if (ix86_cmodel_string != 0)
1159 if (!strcmp (ix86_cmodel_string, "small"))
1160 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1162 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1163 else if (!strcmp (ix86_cmodel_string, "32"))
1164 ix86_cmodel = CM_32;
1165 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1166 ix86_cmodel = CM_KERNEL;
1167 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1168 ix86_cmodel = CM_MEDIUM;
1169 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1170 ix86_cmodel = CM_LARGE;
1172 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1176 ix86_cmodel = CM_32;
1178 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1180 if (ix86_asm_string != 0)
1182 if (!strcmp (ix86_asm_string, "intel"))
1183 ix86_asm_dialect = ASM_INTEL;
1184 else if (!strcmp (ix86_asm_string, "att"))
1185 ix86_asm_dialect = ASM_ATT;
1187 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1189 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1190 error ("code model `%s' not supported in the %s bit mode",
1191 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1192 if (ix86_cmodel == CM_LARGE)
1193 sorry ("code model `large' not supported yet");
1194 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1195 sorry ("%i-bit mode not compiled in",
1196 (target_flags & MASK_64BIT) ? 64 : 32);
1198 for (i = 0; i < pta_size; i++)
1199 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1201 ix86_arch = processor_alias_table[i].processor;
1202 /* Default cpu tuning to the architecture. */
1203 ix86_tune = ix86_arch;
1204 if (processor_alias_table[i].flags & PTA_MMX
1205 && !(target_flags_explicit & MASK_MMX))
1206 target_flags |= MASK_MMX;
1207 if (processor_alias_table[i].flags & PTA_3DNOW
1208 && !(target_flags_explicit & MASK_3DNOW))
1209 target_flags |= MASK_3DNOW;
1210 if (processor_alias_table[i].flags & PTA_3DNOW_A
1211 && !(target_flags_explicit & MASK_3DNOW_A))
1212 target_flags |= MASK_3DNOW_A;
1213 if (processor_alias_table[i].flags & PTA_SSE
1214 && !(target_flags_explicit & MASK_SSE))
1215 target_flags |= MASK_SSE;
1216 if (processor_alias_table[i].flags & PTA_SSE2
1217 && !(target_flags_explicit & MASK_SSE2))
1218 target_flags |= MASK_SSE2;
1219 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1220 x86_prefetch_sse = true;
1221 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1222 error ("CPU you selected does not support x86-64 instruction set");
1227 error ("bad value (%s) for -march= switch", ix86_arch_string);
1229 for (i = 0; i < pta_size; i++)
1230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1232 ix86_tune = processor_alias_table[i].processor;
1233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1234 error ("CPU you selected does not support x86-64 instruction set");
1237 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1238 x86_prefetch_sse = true;
1240 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1243 ix86_cost = &size_cost;
1245 ix86_cost = processor_target_table[ix86_tune].cost;
1246 target_flags |= processor_target_table[ix86_tune].target_enable;
1247 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1249 /* Arrange to set up i386_stack_locals for all functions. */
1250 init_machine_status = ix86_init_machine_status;
1252 /* Validate -mregparm= value. */
1253 if (ix86_regparm_string)
1255 i = atoi (ix86_regparm_string);
1256 if (i < 0 || i > REGPARM_MAX)
1257 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1263 ix86_regparm = REGPARM_MAX;
1265 /* If the user has provided any of the -malign-* options,
1266 warn and use that value only if -falign-* is not set.
1267 Remove this code in GCC 3.2 or later. */
1268 if (ix86_align_loops_string)
1270 warning ("-malign-loops is obsolete, use -falign-loops");
1271 if (align_loops == 0)
1273 i = atoi (ix86_align_loops_string);
1274 if (i < 0 || i > MAX_CODE_ALIGN)
1275 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1277 align_loops = 1 << i;
1281 if (ix86_align_jumps_string)
1283 warning ("-malign-jumps is obsolete, use -falign-jumps");
1284 if (align_jumps == 0)
1286 i = atoi (ix86_align_jumps_string);
1287 if (i < 0 || i > MAX_CODE_ALIGN)
1288 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1290 align_jumps = 1 << i;
1294 if (ix86_align_funcs_string)
1296 warning ("-malign-functions is obsolete, use -falign-functions");
1297 if (align_functions == 0)
1299 i = atoi (ix86_align_funcs_string);
1300 if (i < 0 || i > MAX_CODE_ALIGN)
1301 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1303 align_functions = 1 << i;
1307 /* Default align_* from the processor table. */
1308 if (align_loops == 0)
1310 align_loops = processor_target_table[ix86_tune].align_loop;
1311 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1313 if (align_jumps == 0)
1315 align_jumps = processor_target_table[ix86_tune].align_jump;
1316 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1318 if (align_functions == 0)
1320 align_functions = processor_target_table[ix86_tune].align_func;
1323 /* Validate -mpreferred-stack-boundary= value, or provide default.
1324 The default of 128 bits is for Pentium III's SSE __m128, but we
1325 don't want additional code to keep the stack aligned when
1326 optimizing for code size. */
1327 ix86_preferred_stack_boundary = (optimize_size
1328 ? TARGET_64BIT ? 128 : 32
1330 if (ix86_preferred_stack_boundary_string)
1332 i = atoi (ix86_preferred_stack_boundary_string);
1333 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1334 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1335 TARGET_64BIT ? 4 : 2);
1337 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1340 /* Validate -mbranch-cost= value, or provide default. */
1341 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1342 if (ix86_branch_cost_string)
1344 i = atoi (ix86_branch_cost_string);
1346 error ("-mbranch-cost=%d is not between 0 and 5", i);
1348 ix86_branch_cost = i;
1351 if (ix86_tls_dialect_string)
1353 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1354 ix86_tls_dialect = TLS_DIALECT_GNU;
1355 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1356 ix86_tls_dialect = TLS_DIALECT_SUN;
1358 error ("bad value (%s) for -mtls-dialect= switch",
1359 ix86_tls_dialect_string);
1362 /* Keep nonleaf frame pointers. */
1363 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1364 flag_omit_frame_pointer = 1;
1366 /* If we're doing fast math, we don't care about comparison order
1367 wrt NaNs. This lets us use a shorter comparison sequence. */
1368 if (flag_unsafe_math_optimizations)
1369 target_flags &= ~MASK_IEEE_FP;
1371 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1372 since the insns won't need emulation. */
1373 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1374 target_flags &= ~MASK_NO_FANCY_MATH_387;
1376 /* Turn on SSE2 builtins for -mpni. */
1378 target_flags |= MASK_SSE2;
1380 /* Turn on SSE builtins for -msse2. */
1382 target_flags |= MASK_SSE;
1386 if (TARGET_ALIGN_DOUBLE)
1387 error ("-malign-double makes no sense in the 64bit mode");
1389 error ("-mrtd calling convention not supported in the 64bit mode");
1390 /* Enable by default the SSE and MMX builtins. */
1391 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1392 ix86_fpmath = FPMATH_SSE;
1396 ix86_fpmath = FPMATH_387;
1397 /* i386 ABI does not specify red zone. It still makes sense to use it
1398 when programmer takes care to stack from being destroyed. */
1399 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1400 target_flags |= MASK_NO_RED_ZONE;
1403 if (ix86_fpmath_string != 0)
1405 if (! strcmp (ix86_fpmath_string, "387"))
1406 ix86_fpmath = FPMATH_387;
1407 else if (! strcmp (ix86_fpmath_string, "sse"))
1411 warning ("SSE instruction set disabled, using 387 arithmetics");
1412 ix86_fpmath = FPMATH_387;
1415 ix86_fpmath = FPMATH_SSE;
1417 else if (! strcmp (ix86_fpmath_string, "387,sse")
1418 || ! strcmp (ix86_fpmath_string, "sse,387"))
1422 warning ("SSE instruction set disabled, using 387 arithmetics");
1423 ix86_fpmath = FPMATH_387;
1425 else if (!TARGET_80387)
1427 warning ("387 instruction set disabled, using SSE arithmetics");
1428 ix86_fpmath = FPMATH_SSE;
1431 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1434 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1437 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1441 target_flags |= MASK_MMX;
1442 x86_prefetch_sse = true;
1445 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1448 target_flags |= MASK_MMX;
1449 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1450 extensions it adds. */
1451 if (x86_3dnow_a & (1 << ix86_arch))
1452 target_flags |= MASK_3DNOW_A;
1454 if ((x86_accumulate_outgoing_args & TUNEMASK)
1455 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1457 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1459 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1462 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1463 p = strchr (internal_label_prefix, 'X');
1464 internal_label_prefix_len = p - internal_label_prefix;
1470 optimization_options (level, size)
1472 int size ATTRIBUTE_UNUSED;
1474 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1475 make the problem with not enough registers even worse. */
1476 #ifdef INSN_SCHEDULING
1478 flag_schedule_insns = 0;
1481 /* The default values of these switches depend on the TARGET_64BIT
1482 that is not known at this moment. Mark these values with 2 and
1483 let user the to override these. In case there is no command line option
1484 specifying them, we will set the defaults in override_options. */
1486 flag_omit_frame_pointer = 2;
1487 flag_pcc_struct_return = 2;
1488 flag_asynchronous_unwind_tables = 2;
1491 /* Table of valid machine attributes. */
1492 const struct attribute_spec ix86_attribute_table[] =
1494 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1495 /* Stdcall attribute says callee is responsible for popping arguments
1496 if they are not variable. */
1497 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1498 /* Fastcall attribute says callee is responsible for popping arguments
1499 if they are not variable. */
1500 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1501 /* Cdecl attribute says the callee is a normal C declaration */
1502 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1503 /* Regparm attribute specifies how many integer arguments are to be
1504 passed in registers. */
1505 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1506 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1507 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1508 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1509 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1511 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1512 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1513 { NULL, 0, 0, false, false, false, NULL }
1516 /* Decide whether we can make a sibling call to a function. DECL is the
1517 declaration of the function being targeted by the call and EXP is the
1518 CALL_EXPR representing the call. */
1521 ix86_function_ok_for_sibcall (decl, exp)
1525 /* If we are generating position-independent code, we cannot sibcall
1526 optimize any indirect call, or a direct call to a global function,
1527 as the PLT requires %ebx be live. */
1528 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1531 /* If we are returning floats on the 80387 register stack, we cannot
1532 make a sibcall from a function that doesn't return a float to a
1533 function that does or, conversely, from a function that does return
1534 a float to a function that doesn't; the necessary stack adjustment
1535 would not be executed. */
1536 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1537 != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1540 /* If this call is indirect, we'll need to be able to use a call-clobbered
1541 register for the address of the target function. Make sure that all
1542 such registers are not used for passing parameters. */
1543 if (!decl && !TARGET_64BIT)
1545 int regparm = ix86_regparm;
1548 /* We're looking at the CALL_EXPR, we need the type of the function. */
1549 type = TREE_OPERAND (exp, 0); /* pointer expression */
1550 type = TREE_TYPE (type); /* pointer type */
1551 type = TREE_TYPE (type); /* function type */
1553 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1555 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1559 /* ??? Need to count the actual number of registers to be used,
1560 not the possible number of registers. Fix later. */
1565 /* Otherwise okay. That also includes certain types of indirect calls. */
1569 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1570 arguments as in struct attribute_spec.handler. */
1572 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1575 tree args ATTRIBUTE_UNUSED;
1576 int flags ATTRIBUTE_UNUSED;
1579 if (TREE_CODE (*node) != FUNCTION_TYPE
1580 && TREE_CODE (*node) != METHOD_TYPE
1581 && TREE_CODE (*node) != FIELD_DECL
1582 && TREE_CODE (*node) != TYPE_DECL)
1584 warning ("`%s' attribute only applies to functions",
1585 IDENTIFIER_POINTER (name));
1586 *no_add_attrs = true;
1590 if (is_attribute_p ("fastcall", name))
1592 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1594 error ("fastcall and stdcall attributes are not compatible");
1596 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1598 error ("fastcall and regparm attributes are not compatible");
1601 else if (is_attribute_p ("stdcall", name))
1603 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1605 error ("fastcall and stdcall attributes are not compatible");
1612 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1613 *no_add_attrs = true;
1619 /* Handle a "regparm" attribute;
1620 arguments as in struct attribute_spec.handler. */
1622 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1626 int flags ATTRIBUTE_UNUSED;
1629 if (TREE_CODE (*node) != FUNCTION_TYPE
1630 && TREE_CODE (*node) != METHOD_TYPE
1631 && TREE_CODE (*node) != FIELD_DECL
1632 && TREE_CODE (*node) != TYPE_DECL)
1634 warning ("`%s' attribute only applies to functions",
1635 IDENTIFIER_POINTER (name));
1636 *no_add_attrs = true;
1642 cst = TREE_VALUE (args);
1643 if (TREE_CODE (cst) != INTEGER_CST)
1645 warning ("`%s' attribute requires an integer constant argument",
1646 IDENTIFIER_POINTER (name));
1647 *no_add_attrs = true;
1649 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1651 warning ("argument to `%s' attribute larger than %d",
1652 IDENTIFIER_POINTER (name), REGPARM_MAX);
1653 *no_add_attrs = true;
1656 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1658 error ("fastcall and regparm attributes are not compatible");
1665 /* Return 0 if the attributes for two types are incompatible, 1 if they
1666 are compatible, and 2 if they are nearly compatible (which causes a
1667 warning to be generated). */
1670 ix86_comp_type_attributes (type1, type2)
1674 /* Check for mismatch of non-default calling convention. */
1675 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1677 if (TREE_CODE (type1) != FUNCTION_TYPE)
1680 /* Check for mismatched fastcall types */
1681 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1682 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1685 /* Check for mismatched return types (cdecl vs stdcall). */
1686 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1687 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1692 /* Return the regparm value for a fuctio with the indicated TYPE. */
1695 ix86_fntype_regparm (type)
1700 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1702 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1704 return ix86_regparm;
1707 /* Value is the number of bytes of arguments automatically
1708 popped when returning from a subroutine call.
1709 FUNDECL is the declaration node of the function (as a tree),
1710 FUNTYPE is the data type of the function (as a tree),
1711 or for a library call it is an identifier node for the subroutine name.
1712 SIZE is the number of bytes of arguments passed on the stack.
1714 On the 80386, the RTD insn may be used to pop them if the number
1715 of args is fixed, but if the number is variable then the caller
1716 must pop them all. RTD can't be used for library calls now
1717 because the library is compiled with the Unix compiler.
1718 Use of RTD is a selectable option, since it is incompatible with
1719 standard Unix calling sequences. If the option is not selected,
1720 the caller must always pop the args.
1722 The attribute stdcall is equivalent to RTD on a per module basis. */
1725 ix86_return_pops_args (fundecl, funtype, size)
1730 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1732 /* Cdecl functions override -mrtd, and never pop the stack. */
1733 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1735 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1737 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1741 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1742 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1743 == void_type_node)))
1747 /* Lose any fake structure return argument if it is passed on the stack. */
1748 if (aggregate_value_p (TREE_TYPE (funtype))
1751 int nregs = ix86_fntype_regparm (funtype);
1754 return GET_MODE_SIZE (Pmode);
1760 /* Argument support functions. */
1762 /* Return true when register may be used to pass function parameters. */
1764 ix86_function_arg_regno_p (regno)
1769 return (regno < REGPARM_MAX
1770 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1771 if (SSE_REGNO_P (regno) && TARGET_SSE)
1773 /* RAX is used as hidden argument to va_arg functions. */
1776 for (i = 0; i < REGPARM_MAX; i++)
1777 if (regno == x86_64_int_parameter_registers[i])
1782 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1783 for a call to a function whose data type is FNTYPE.
1784 For a library call, FNTYPE is 0. */
1787 init_cumulative_args (cum, fntype, libname, fndecl)
1788 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1789 tree fntype; /* tree ptr for function decl */
1790 rtx libname; /* SYMBOL_REF of library name or 0 */
1793 static CUMULATIVE_ARGS zero_cum;
1794 tree param, next_param;
1795 bool user_convention = false;
1797 if (TARGET_DEBUG_ARG)
1799 fprintf (stderr, "\ninit_cumulative_args (");
1801 fprintf (stderr, "fntype code = %s, ret code = %s",
1802 tree_code_name[(int) TREE_CODE (fntype)],
1803 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1805 fprintf (stderr, "no fntype");
1808 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1813 /* Set up the number of registers to use for passing arguments. */
1814 cum->nregs = ix86_regparm;
1815 cum->sse_nregs = SSE_REGPARM_MAX;
1816 if (fntype && !TARGET_64BIT)
1818 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1822 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1823 user_convention = true;
1826 cum->maybe_vaarg = false;
1828 /* Use ecx and edx registers if function has fastcall attribute */
1829 if (fntype && !TARGET_64BIT)
1831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1835 user_convention = true;
1839 /* Use register calling convention for local functions when possible. */
1840 if (!TARGET_64BIT && !user_convention && fndecl
1841 && flag_unit_at_a_time)
1843 struct cgraph_local_info *i = cgraph_local_info (fndecl);
1846 /* We can't use regparm(3) for nested functions as these use
1847 static chain pointer in third argument. */
1848 if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl))
1856 /* Determine if this function has variable arguments. This is
1857 indicated by the last argument being 'void_type_mode' if there
1858 are no variable arguments. If there are variable arguments, then
1859 we won't pass anything in registers */
1863 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1864 param != 0; param = next_param)
1866 next_param = TREE_CHAIN (param);
1867 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1874 cum->maybe_vaarg = true;
1878 if ((!fntype && !libname)
1879 || (fntype && !TYPE_ARG_TYPES (fntype)))
1880 cum->maybe_vaarg = 1;
1882 if (TARGET_DEBUG_ARG)
1883 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1888 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1889 of this code is to classify each 8bytes of incoming argument by the register
1890 class and assign registers accordingly. */
1892 /* Return the union class of CLASS1 and CLASS2.
1893 See the x86-64 PS ABI for details. */
1895 static enum x86_64_reg_class
1896 merge_classes (class1, class2)
1897 enum x86_64_reg_class class1, class2;
1899 /* Rule #1: If both classes are equal, this is the resulting class. */
1900 if (class1 == class2)
1903 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1905 if (class1 == X86_64_NO_CLASS)
1907 if (class2 == X86_64_NO_CLASS)
1910 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1911 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1912 return X86_64_MEMORY_CLASS;
1914 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1915 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1916 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1917 return X86_64_INTEGERSI_CLASS;
1918 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1919 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1920 return X86_64_INTEGER_CLASS;
1922 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1923 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1924 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1925 return X86_64_MEMORY_CLASS;
1927 /* Rule #6: Otherwise class SSE is used. */
1928 return X86_64_SSE_CLASS;
1931 /* Classify the argument of type TYPE and mode MODE.
1932 CLASSES will be filled by the register class used to pass each word
1933 of the operand. The number of words is returned. In case the parameter
1934 should be passed in memory, 0 is returned. As a special case for zero
1935 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1937 BIT_OFFSET is used internally for handling records and specifies offset
1938 of the offset in bits modulo 256 to avoid overflow cases.
1940 See the x86-64 PS ABI for details.
1944 classify_argument (mode, type, classes, bit_offset)
1945 enum machine_mode mode;
1947 enum x86_64_reg_class classes[MAX_CLASSES];
1951 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1952 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1954 /* Variable sized entities are always passed/returned in memory. */
1958 if (mode != VOIDmode
1959 && MUST_PASS_IN_STACK (mode, type))
1962 if (type && AGGREGATE_TYPE_P (type))
1966 enum x86_64_reg_class subclasses[MAX_CLASSES];
1968 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1972 for (i = 0; i < words; i++)
1973 classes[i] = X86_64_NO_CLASS;
1975 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1976 signalize memory class, so handle it as special case. */
1979 classes[0] = X86_64_NO_CLASS;
1983 /* Classify each field of record and merge classes. */
1984 if (TREE_CODE (type) == RECORD_TYPE)
1986 /* For classes first merge in the field of the subclasses. */
1987 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1989 tree bases = TYPE_BINFO_BASETYPES (type);
1990 int n_bases = TREE_VEC_LENGTH (bases);
1993 for (i = 0; i < n_bases; ++i)
1995 tree binfo = TREE_VEC_ELT (bases, i);
1997 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1998 tree type = BINFO_TYPE (binfo);
2000 num = classify_argument (TYPE_MODE (type),
2002 (offset + bit_offset) % 256);
2005 for (i = 0; i < num; i++)
2007 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2009 merge_classes (subclasses[i], classes[i + pos]);
2013 /* And now merge the fields of structure. */
2014 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2016 if (TREE_CODE (field) == FIELD_DECL)
2020 /* Bitfields are always classified as integer. Handle them
2021 early, since later code would consider them to be
2022 misaligned integers. */
2023 if (DECL_BIT_FIELD (field))
2025 for (i = int_bit_position (field) / 8 / 8;
2026 i < (int_bit_position (field)
2027 + tree_low_cst (DECL_SIZE (field), 0)
2030 merge_classes (X86_64_INTEGER_CLASS,
2035 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2036 TREE_TYPE (field), subclasses,
2037 (int_bit_position (field)
2038 + bit_offset) % 256);
2041 for (i = 0; i < num; i++)
2044 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2046 merge_classes (subclasses[i], classes[i + pos]);
2052 /* Arrays are handled as small records. */
2053 else if (TREE_CODE (type) == ARRAY_TYPE)
2056 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2057 TREE_TYPE (type), subclasses, bit_offset);
2061 /* The partial classes are now full classes. */
2062 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2063 subclasses[0] = X86_64_SSE_CLASS;
2064 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2065 subclasses[0] = X86_64_INTEGER_CLASS;
2067 for (i = 0; i < words; i++)
2068 classes[i] = subclasses[i % num];
2070 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2071 else if (TREE_CODE (type) == UNION_TYPE
2072 || TREE_CODE (type) == QUAL_UNION_TYPE)
2074 /* For classes first merge in the field of the subclasses. */
2075 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2077 tree bases = TYPE_BINFO_BASETYPES (type);
2078 int n_bases = TREE_VEC_LENGTH (bases);
2081 for (i = 0; i < n_bases; ++i)
2083 tree binfo = TREE_VEC_ELT (bases, i);
2085 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2086 tree type = BINFO_TYPE (binfo);
2088 num = classify_argument (TYPE_MODE (type),
2090 (offset + (bit_offset % 64)) % 256);
2093 for (i = 0; i < num; i++)
2095 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2097 merge_classes (subclasses[i], classes[i + pos]);
2101 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2103 if (TREE_CODE (field) == FIELD_DECL)
2106 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2107 TREE_TYPE (field), subclasses,
2111 for (i = 0; i < num; i++)
2112 classes[i] = merge_classes (subclasses[i], classes[i]);
2119 /* Final merger cleanup. */
2120 for (i = 0; i < words; i++)
2122 /* If one class is MEMORY, everything should be passed in
2124 if (classes[i] == X86_64_MEMORY_CLASS)
2127 /* The X86_64_SSEUP_CLASS should be always preceded by
2128 X86_64_SSE_CLASS. */
2129 if (classes[i] == X86_64_SSEUP_CLASS
2130 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2131 classes[i] = X86_64_SSE_CLASS;
2133 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2134 if (classes[i] == X86_64_X87UP_CLASS
2135 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2136 classes[i] = X86_64_SSE_CLASS;
2141 /* Compute alignment needed. We align all types to natural boundaries with
2142 exception of XFmode that is aligned to 64bits. */
2143 if (mode != VOIDmode && mode != BLKmode)
2145 int mode_alignment = GET_MODE_BITSIZE (mode);
2148 mode_alignment = 128;
2149 else if (mode == XCmode)
2150 mode_alignment = 256;
2151 /* Misaligned fields are always returned in memory. */
2152 if (bit_offset % mode_alignment)
2156 /* Classification of atomic types. */
2166 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2167 classes[0] = X86_64_INTEGERSI_CLASS;
2169 classes[0] = X86_64_INTEGER_CLASS;
2173 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2176 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2177 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2180 if (!(bit_offset % 64))
2181 classes[0] = X86_64_SSESF_CLASS;
2183 classes[0] = X86_64_SSE_CLASS;
2186 classes[0] = X86_64_SSEDF_CLASS;
2189 classes[0] = X86_64_X87_CLASS;
2190 classes[1] = X86_64_X87UP_CLASS;
2193 classes[0] = X86_64_X87_CLASS;
2194 classes[1] = X86_64_X87UP_CLASS;
2195 classes[2] = X86_64_X87_CLASS;
2196 classes[3] = X86_64_X87UP_CLASS;
2199 classes[0] = X86_64_SSEDF_CLASS;
2200 classes[1] = X86_64_SSEDF_CLASS;
2203 classes[0] = X86_64_SSE_CLASS;
2211 classes[0] = X86_64_SSE_CLASS;
2212 classes[1] = X86_64_SSEUP_CLASS;
2227 /* Examine the argument and return set number of register required in each
2228 class. Return 0 iff parameter should be passed in memory. */
2230 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2231 enum machine_mode mode;
2233 int *int_nregs, *sse_nregs;
2236 enum x86_64_reg_class class[MAX_CLASSES];
2237 int n = classify_argument (mode, type, class, 0);
2243 for (n--; n >= 0; n--)
2246 case X86_64_INTEGER_CLASS:
2247 case X86_64_INTEGERSI_CLASS:
2250 case X86_64_SSE_CLASS:
2251 case X86_64_SSESF_CLASS:
2252 case X86_64_SSEDF_CLASS:
2255 case X86_64_NO_CLASS:
2256 case X86_64_SSEUP_CLASS:
2258 case X86_64_X87_CLASS:
2259 case X86_64_X87UP_CLASS:
2263 case X86_64_MEMORY_CLASS:
2268 /* Construct container for the argument used by GCC interface. See
2269 FUNCTION_ARG for the detailed description. */
2271 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2272 enum machine_mode mode;
2275 int nintregs, nsseregs;
2279 enum machine_mode tmpmode;
2281 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2282 enum x86_64_reg_class class[MAX_CLASSES];
2286 int needed_sseregs, needed_intregs;
2287 rtx exp[MAX_CLASSES];
2290 n = classify_argument (mode, type, class, 0);
2291 if (TARGET_DEBUG_ARG)
2294 fprintf (stderr, "Memory class\n");
2297 fprintf (stderr, "Classes:");
2298 for (i = 0; i < n; i++)
2300 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2302 fprintf (stderr, "\n");
2307 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2309 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2312 /* First construct simple cases. Avoid SCmode, since we want to use
2313 single register to pass this type. */
2314 if (n == 1 && mode != SCmode)
2317 case X86_64_INTEGER_CLASS:
2318 case X86_64_INTEGERSI_CLASS:
2319 return gen_rtx_REG (mode, intreg[0]);
2320 case X86_64_SSE_CLASS:
2321 case X86_64_SSESF_CLASS:
2322 case X86_64_SSEDF_CLASS:
2323 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2324 case X86_64_X87_CLASS:
2325 return gen_rtx_REG (mode, FIRST_STACK_REG);
2326 case X86_64_NO_CLASS:
2327 /* Zero sized array, struct or class. */
2332 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2333 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2335 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2336 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2337 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2338 && class[1] == X86_64_INTEGER_CLASS
2339 && (mode == CDImode || mode == TImode)
2340 && intreg[0] + 1 == intreg[1])
2341 return gen_rtx_REG (mode, intreg[0]);
2343 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2344 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2345 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2347 /* Otherwise figure out the entries of the PARALLEL. */
2348 for (i = 0; i < n; i++)
2352 case X86_64_NO_CLASS:
2354 case X86_64_INTEGER_CLASS:
2355 case X86_64_INTEGERSI_CLASS:
2356 /* Merge TImodes on aligned occasions here too. */
2357 if (i * 8 + 8 > bytes)
2358 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2359 else if (class[i] == X86_64_INTEGERSI_CLASS)
2363 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2364 if (tmpmode == BLKmode)
2366 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2367 gen_rtx_REG (tmpmode, *intreg),
2371 case X86_64_SSESF_CLASS:
2372 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2373 gen_rtx_REG (SFmode,
2374 SSE_REGNO (sse_regno)),
2378 case X86_64_SSEDF_CLASS:
2379 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2380 gen_rtx_REG (DFmode,
2381 SSE_REGNO (sse_regno)),
2385 case X86_64_SSE_CLASS:
2386 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2390 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2391 gen_rtx_REG (tmpmode,
2392 SSE_REGNO (sse_regno)),
2394 if (tmpmode == TImode)
2402 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2403 for (i = 0; i < nexps; i++)
2404 XVECEXP (ret, 0, i) = exp [i];
2408 /* Update the data in CUM to advance over an argument
2409 of mode MODE and data type TYPE.
2410 (TYPE is null for libcalls where that information may not be available.) */
2413 function_arg_advance (cum, mode, type, named)
2414 CUMULATIVE_ARGS *cum; /* current arg information */
2415 enum machine_mode mode; /* current arg mode */
2416 tree type; /* type of the argument or 0 if lib support */
2417 int named; /* whether or not the argument was named */
2420 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2421 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2423 if (TARGET_DEBUG_ARG)
2425 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2426 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2429 int int_nregs, sse_nregs;
2430 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2431 cum->words += words;
2432 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2434 cum->nregs -= int_nregs;
2435 cum->sse_nregs -= sse_nregs;
2436 cum->regno += int_nregs;
2437 cum->sse_regno += sse_nregs;
2440 cum->words += words;
2444 if (TARGET_SSE && mode == TImode)
2446 cum->sse_words += words;
2447 cum->sse_nregs -= 1;
2448 cum->sse_regno += 1;
2449 if (cum->sse_nregs <= 0)
2457 cum->words += words;
2458 cum->nregs -= words;
2459 cum->regno += words;
2461 if (cum->nregs <= 0)
2471 /* Define where to put the arguments to a function.
2472 Value is zero to push the argument on the stack,
2473 or a hard register in which to store the argument.
2475 MODE is the argument's machine mode.
2476 TYPE is the data type of the argument (as a tree).
2477 This is null for libcalls where that information may
2479 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2480 the preceding args and about the function being called.
2481 NAMED is nonzero if this argument is a named parameter
2482 (otherwise it is an extra parameter matching an ellipsis). */
2485 function_arg (cum, mode, type, named)
2486 CUMULATIVE_ARGS *cum; /* current arg information */
2487 enum machine_mode mode; /* current arg mode */
2488 tree type; /* type of the argument or 0 if lib support */
2489 int named; /* != 0 for normal args, == 0 for ... args */
2493 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2494 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2496 /* Handle a hidden AL argument containing number of registers for varargs
2497 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2499 if (mode == VOIDmode)
2502 return GEN_INT (cum->maybe_vaarg
2503 ? (cum->sse_nregs < 0
2511 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2512 &x86_64_int_parameter_registers [cum->regno],
2517 /* For now, pass fp/complex values on the stack. */
2529 if (words <= cum->nregs)
2531 int regno = cum->regno;
2533 /* Fastcall allocates the first two DWORD (SImode) or
2534 smaller arguments to ECX and EDX. */
2537 if (mode == BLKmode || mode == DImode)
2540 /* ECX not EAX is the first allocated register. */
2544 ret = gen_rtx_REG (mode, regno);
2549 ret = gen_rtx_REG (mode, cum->sse_regno);
2553 if (TARGET_DEBUG_ARG)
2556 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2557 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2560 print_simple_rtl (stderr, ret);
2562 fprintf (stderr, ", stack");
2564 fprintf (stderr, " )\n");
2570 /* A C expression that indicates when an argument must be passed by
2571 reference. If nonzero for an argument, a copy of that argument is
2572 made in memory and a pointer to the argument is passed instead of
2573 the argument itself. The pointer is passed in whatever way is
2574 appropriate for passing a pointer to that type. */
2577 function_arg_pass_by_reference (cum, mode, type, named)
2578 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2579 enum machine_mode mode ATTRIBUTE_UNUSED;
2581 int named ATTRIBUTE_UNUSED;
2586 if (type && int_size_in_bytes (type) == -1)
2588 if (TARGET_DEBUG_ARG)
2589 fprintf (stderr, "function_arg_pass_by_reference\n");
2596 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
2599 contains_128bit_aligned_vector_p (type)
2602 enum machine_mode mode = TYPE_MODE (type);
2603 if (SSE_REG_MODE_P (mode)
2604 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2606 if (TYPE_ALIGN (type) < 128)
2609 if (AGGREGATE_TYPE_P (type))
2611 /* Walk the agregates recursivly. */
2612 if (TREE_CODE (type) == RECORD_TYPE
2613 || TREE_CODE (type) == UNION_TYPE
2614 || TREE_CODE (type) == QUAL_UNION_TYPE)
2618 if (TYPE_BINFO (type) != NULL
2619 && TYPE_BINFO_BASETYPES (type) != NULL)
2621 tree bases = TYPE_BINFO_BASETYPES (type);
2622 int n_bases = TREE_VEC_LENGTH (bases);
2625 for (i = 0; i < n_bases; ++i)
2627 tree binfo = TREE_VEC_ELT (bases, i);
2628 tree type = BINFO_TYPE (binfo);
2630 if (contains_128bit_aligned_vector_p (type))
2634 /* And now merge the fields of structure. */
2635 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2637 if (TREE_CODE (field) == FIELD_DECL
2638 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2642 /* Just for use if some languages passes arrays by value. */
2643 else if (TREE_CODE (type) == ARRAY_TYPE)
2645 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2654 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2658 ix86_function_arg_boundary (mode, type)
2659 enum machine_mode mode;
2664 align = TYPE_ALIGN (type);
2666 align = GET_MODE_ALIGNMENT (mode);
2667 if (align < PARM_BOUNDARY)
2668 align = PARM_BOUNDARY;
2671 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
2672 make an exception for SSE modes since these require 128bit
2675 The handling here differs from field_alignment. ICC aligns MMX
2676 arguments to 4 byte boundaries, while structure fields are aligned
2677 to 8 byte boundaries. */
2680 if (!SSE_REG_MODE_P (mode))
2681 align = PARM_BOUNDARY;
2685 if (!contains_128bit_aligned_vector_p (type))
2686 align = PARM_BOUNDARY;
2688 if (align != PARM_BOUNDARY && !TARGET_SSE)
2696 /* Return true if N is a possible register number of function value. */
2698 ix86_function_value_regno_p (regno)
2703 return ((regno) == 0
2704 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2705 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2707 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2708 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2709 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2712 /* Define how to find the value returned by a function.
2713 VALTYPE is the data type of the value (as a tree).
2714 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2715 otherwise, FUNC is 0. */
2717 ix86_function_value (valtype)
2722 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2723 REGPARM_MAX, SSE_REGPARM_MAX,
2724 x86_64_int_return_registers, 0);
2725 /* For zero sized structures, construct_container return NULL, but we need
2726 to keep rest of compiler happy by returning meaningful value. */
2728 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2732 return gen_rtx_REG (TYPE_MODE (valtype),
2733 ix86_value_regno (TYPE_MODE (valtype)));
2736 /* Return false iff type is returned in memory. */
2738 ix86_return_in_memory (type)
2741 int needed_intregs, needed_sseregs;
2744 return !examine_argument (TYPE_MODE (type), type, 1,
2745 &needed_intregs, &needed_sseregs);
2749 if (TYPE_MODE (type) == BLKmode)
2751 else if (MS_AGGREGATE_RETURN
2752 && AGGREGATE_TYPE_P (type)
2753 && int_size_in_bytes(type) <= 8)
2755 else if ((VECTOR_MODE_P (TYPE_MODE (type))
2756 && int_size_in_bytes (type) == 8)
2757 || (int_size_in_bytes (type) > 12
2758 && TYPE_MODE (type) != TImode
2759 && TYPE_MODE (type) != TFmode
2760 && !VECTOR_MODE_P (TYPE_MODE (type))))
2766 /* Define how to find the value returned by a library function
2767 assuming the value has mode MODE. */
2769 ix86_libcall_value (mode)
2770 enum machine_mode mode;
2780 return gen_rtx_REG (mode, FIRST_SSE_REG);
2783 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2785 return gen_rtx_REG (mode, 0);
2789 return gen_rtx_REG (mode, ix86_value_regno (mode));
2792 /* Given a mode, return the register to use for a return value. */
2795 ix86_value_regno (mode)
2796 enum machine_mode mode;
2798 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2799 return FIRST_FLOAT_REG;
2800 if (mode == TImode || VECTOR_MODE_P (mode))
2801 return FIRST_SSE_REG;
2805 /* Create the va_list data type. */
2808 ix86_build_va_list ()
2810 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2812 /* For i386 we use plain pointer to argument area. */
2814 return build_pointer_type (char_type_node);
2816 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2817 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2819 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2820 unsigned_type_node);
2821 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2822 unsigned_type_node);
2823 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2825 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2828 DECL_FIELD_CONTEXT (f_gpr) = record;
2829 DECL_FIELD_CONTEXT (f_fpr) = record;
2830 DECL_FIELD_CONTEXT (f_ovf) = record;
2831 DECL_FIELD_CONTEXT (f_sav) = record;
2833 TREE_CHAIN (record) = type_decl;
2834 TYPE_NAME (record) = type_decl;
2835 TYPE_FIELDS (record) = f_gpr;
2836 TREE_CHAIN (f_gpr) = f_fpr;
2837 TREE_CHAIN (f_fpr) = f_ovf;
2838 TREE_CHAIN (f_ovf) = f_sav;
2840 layout_type (record);
2842 /* The correct type is an array type of one element. */
2843 return build_array_type (record, build_index_type (size_zero_node));
2846 /* Perform any needed actions needed for a function that is receiving a
2847 variable number of arguments.
2851 MODE and TYPE are the mode and type of the current parameter.
2853 PRETEND_SIZE is a variable that should be set to the amount of stack
2854 that must be pushed by the prolog to pretend that our caller pushed
2857 Normally, this macro will push all remaining incoming registers on the
2858 stack and set PRETEND_SIZE to the length of the registers pushed. */
2861 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2862 CUMULATIVE_ARGS *cum;
2863 enum machine_mode mode;
2865 int *pretend_size ATTRIBUTE_UNUSED;
2869 CUMULATIVE_ARGS next_cum;
2870 rtx save_area = NULL_RTX, mem;
2883 /* Indicate to allocate space on the stack for varargs save area. */
2884 ix86_save_varrargs_registers = 1;
2886 cfun->stack_alignment_needed = 128;
2888 fntype = TREE_TYPE (current_function_decl);
2889 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2890 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2891 != void_type_node));
2893 /* For varargs, we do not want to skip the dummy va_dcl argument.
2894 For stdargs, we do want to skip the last named argument. */
2897 function_arg_advance (&next_cum, mode, type, 1);
2900 save_area = frame_pointer_rtx;
2902 set = get_varargs_alias_set ();
2904 for (i = next_cum.regno; i < ix86_regparm; i++)
2906 mem = gen_rtx_MEM (Pmode,
2907 plus_constant (save_area, i * UNITS_PER_WORD));
2908 set_mem_alias_set (mem, set);
2909 emit_move_insn (mem, gen_rtx_REG (Pmode,
2910 x86_64_int_parameter_registers[i]));
2913 if (next_cum.sse_nregs)
2915 /* Now emit code to save SSE registers. The AX parameter contains number
2916 of SSE parameter registers used to call this function. We use
2917 sse_prologue_save insn template that produces computed jump across
2918 SSE saves. We need some preparation work to get this working. */
2920 label = gen_label_rtx ();
2921 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2923 /* Compute address to jump to :
2924 label - 5*eax + nnamed_sse_arguments*5 */
2925 tmp_reg = gen_reg_rtx (Pmode);
2926 nsse_reg = gen_reg_rtx (Pmode);
2927 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2928 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2929 gen_rtx_MULT (Pmode, nsse_reg,
2931 if (next_cum.sse_regno)
2934 gen_rtx_CONST (DImode,
2935 gen_rtx_PLUS (DImode,
2937 GEN_INT (next_cum.sse_regno * 4))));
2939 emit_move_insn (nsse_reg, label_ref);
2940 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2942 /* Compute address of memory block we save into. We always use pointer
2943 pointing 127 bytes after first byte to store - this is needed to keep
2944 instruction size limited by 4 bytes. */
2945 tmp_reg = gen_reg_rtx (Pmode);
2946 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2947 plus_constant (save_area,
2948 8 * REGPARM_MAX + 127)));
2949 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2950 set_mem_alias_set (mem, set);
2951 set_mem_align (mem, BITS_PER_WORD);
2953 /* And finally do the dirty job! */
2954 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2955 GEN_INT (next_cum.sse_regno), label));
2960 /* Implement va_start. */
2963 ix86_va_start (valist, nextarg)
2967 HOST_WIDE_INT words, n_gpr, n_fpr;
2968 tree f_gpr, f_fpr, f_ovf, f_sav;
2969 tree gpr, fpr, ovf, sav, t;
2971 /* Only 64bit target needs something special. */
2974 std_expand_builtin_va_start (valist, nextarg);
2978 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2979 f_fpr = TREE_CHAIN (f_gpr);
2980 f_ovf = TREE_CHAIN (f_fpr);
2981 f_sav = TREE_CHAIN (f_ovf);
2983 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2984 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2985 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2986 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2987 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2989 /* Count number of gp and fp argument registers used. */
2990 words = current_function_args_info.words;
2991 n_gpr = current_function_args_info.regno;
2992 n_fpr = current_function_args_info.sse_regno;
2994 if (TARGET_DEBUG_ARG)
2995 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2996 (int) words, (int) n_gpr, (int) n_fpr);
2998 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2999 build_int_2 (n_gpr * 8, 0));
3000 TREE_SIDE_EFFECTS (t) = 1;
3001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3003 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3004 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3005 TREE_SIDE_EFFECTS (t) = 1;
3006 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3008 /* Find the overflow area. */
3009 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3011 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3012 build_int_2 (words * UNITS_PER_WORD, 0));
3013 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3014 TREE_SIDE_EFFECTS (t) = 1;
3015 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3017 /* Find the register save area.
3018 Prologue of the function save it right above stack frame. */
3019 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3020 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3021 TREE_SIDE_EFFECTS (t) = 1;
3022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3025 /* Implement va_arg. */
3027 ix86_va_arg (valist, type)
3030 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3031 tree f_gpr, f_fpr, f_ovf, f_sav;
3032 tree gpr, fpr, ovf, sav, t;
3034 rtx lab_false, lab_over = NULL_RTX;
3039 /* Only 64bit target needs something special. */
3042 return std_expand_builtin_va_arg (valist, type);
3045 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3046 f_fpr = TREE_CHAIN (f_gpr);
3047 f_ovf = TREE_CHAIN (f_fpr);
3048 f_sav = TREE_CHAIN (f_ovf);
3050 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3051 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3052 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3053 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3054 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3056 size = int_size_in_bytes (type);
3059 /* Passed by reference. */
3061 type = build_pointer_type (type);
3062 size = int_size_in_bytes (type);
3064 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3066 container = construct_container (TYPE_MODE (type), type, 0,
3067 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3069 * Pull the value out of the saved registers ...
3072 addr_rtx = gen_reg_rtx (Pmode);
3076 rtx int_addr_rtx, sse_addr_rtx;
3077 int needed_intregs, needed_sseregs;
3080 lab_over = gen_label_rtx ();
3081 lab_false = gen_label_rtx ();
3083 examine_argument (TYPE_MODE (type), type, 0,
3084 &needed_intregs, &needed_sseregs);
3087 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3088 || TYPE_ALIGN (type) > 128);
3090 /* In case we are passing structure, verify that it is consecutive block
3091 on the register save area. If not we need to do moves. */
3092 if (!need_temp && !REG_P (container))
3094 /* Verify that all registers are strictly consecutive */
3095 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3099 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3101 rtx slot = XVECEXP (container, 0, i);
3102 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3103 || INTVAL (XEXP (slot, 1)) != i * 16)
3111 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3113 rtx slot = XVECEXP (container, 0, i);
3114 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3115 || INTVAL (XEXP (slot, 1)) != i * 8)
3122 int_addr_rtx = addr_rtx;
3123 sse_addr_rtx = addr_rtx;
3127 int_addr_rtx = gen_reg_rtx (Pmode);
3128 sse_addr_rtx = gen_reg_rtx (Pmode);
3130 /* First ensure that we fit completely in registers. */
3133 emit_cmp_and_jump_insns (expand_expr
3134 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3135 GEN_INT ((REGPARM_MAX - needed_intregs +
3136 1) * 8), GE, const1_rtx, SImode,
3141 emit_cmp_and_jump_insns (expand_expr
3142 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3143 GEN_INT ((SSE_REGPARM_MAX -
3144 needed_sseregs + 1) * 16 +
3145 REGPARM_MAX * 8), GE, const1_rtx,
3146 SImode, 1, lab_false);
3149 /* Compute index to start of area used for integer regs. */
3152 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3153 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3154 if (r != int_addr_rtx)
3155 emit_move_insn (int_addr_rtx, r);
3159 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3160 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3161 if (r != sse_addr_rtx)
3162 emit_move_insn (sse_addr_rtx, r);
3170 /* Never use the memory itself, as it has the alias set. */
3171 x = XEXP (assign_temp (type, 0, 1, 0), 0);
3172 mem = gen_rtx_MEM (BLKmode, x);
3173 force_operand (x, addr_rtx);
3174 set_mem_alias_set (mem, get_varargs_alias_set ());
3175 set_mem_align (mem, BITS_PER_UNIT);
3177 for (i = 0; i < XVECLEN (container, 0); i++)
3179 rtx slot = XVECEXP (container, 0, i);
3180 rtx reg = XEXP (slot, 0);
3181 enum machine_mode mode = GET_MODE (reg);
3187 if (SSE_REGNO_P (REGNO (reg)))
3189 src_addr = sse_addr_rtx;
3190 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3194 src_addr = int_addr_rtx;
3195 src_offset = REGNO (reg) * 8;
3197 src_mem = gen_rtx_MEM (mode, src_addr);
3198 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3199 src_mem = adjust_address (src_mem, mode, src_offset);
3200 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3201 emit_move_insn (dest_mem, src_mem);
3208 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3209 build_int_2 (needed_intregs * 8, 0));
3210 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3211 TREE_SIDE_EFFECTS (t) = 1;
3212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3217 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3218 build_int_2 (needed_sseregs * 16, 0));
3219 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3220 TREE_SIDE_EFFECTS (t) = 1;
3221 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3224 emit_jump_insn (gen_jump (lab_over));
3226 emit_label (lab_false);
3229 /* ... otherwise out of the overflow area. */
3231 /* Care for on-stack alignment if needed. */
3232 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3236 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3237 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3238 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3242 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3244 emit_move_insn (addr_rtx, r);
3247 build (PLUS_EXPR, TREE_TYPE (t), t,
3248 build_int_2 (rsize * UNITS_PER_WORD, 0));
3249 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3254 emit_label (lab_over);
3258 r = gen_rtx_MEM (Pmode, addr_rtx);
3259 set_mem_alias_set (r, get_varargs_alias_set ());
3260 emit_move_insn (addr_rtx, r);
3266 /* Return nonzero if OP is either a i387 or SSE fp register. */
3268 any_fp_register_operand (op, mode)
3270 enum machine_mode mode ATTRIBUTE_UNUSED;
3272 return ANY_FP_REG_P (op);
3275 /* Return nonzero if OP is an i387 fp register. */
3277 fp_register_operand (op, mode)
3279 enum machine_mode mode ATTRIBUTE_UNUSED;
3281 return FP_REG_P (op);
3284 /* Return nonzero if OP is a non-fp register_operand. */
3286 register_and_not_any_fp_reg_operand (op, mode)
3288 enum machine_mode mode;
3290 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3293 /* Return nonzero if OP is a register operand other than an
3294 i387 fp register. */
3296 register_and_not_fp_reg_operand (op, mode)
3298 enum machine_mode mode;
3300 return register_operand (op, mode) && !FP_REG_P (op);
3303 /* Return nonzero if OP is general operand representable on x86_64. */
3306 x86_64_general_operand (op, mode)
3308 enum machine_mode mode;
3311 return general_operand (op, mode);
3312 if (nonimmediate_operand (op, mode))
3314 return x86_64_sign_extended_value (op);
3317 /* Return nonzero if OP is general operand representable on x86_64
3318 as either sign extended or zero extended constant. */
3321 x86_64_szext_general_operand (op, mode)
3323 enum machine_mode mode;
3326 return general_operand (op, mode);
3327 if (nonimmediate_operand (op, mode))
3329 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3332 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3335 x86_64_nonmemory_operand (op, mode)
3337 enum machine_mode mode;
3340 return nonmemory_operand (op, mode);
3341 if (register_operand (op, mode))
3343 return x86_64_sign_extended_value (op);
3346 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3349 x86_64_movabs_operand (op, mode)
3351 enum machine_mode mode;
3353 if (!TARGET_64BIT || !flag_pic)
3354 return nonmemory_operand (op, mode);
3355 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3357 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3362 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3365 x86_64_szext_nonmemory_operand (op, mode)
3367 enum machine_mode mode;
3370 return nonmemory_operand (op, mode);
3371 if (register_operand (op, mode))
3373 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3376 /* Return nonzero if OP is immediate operand representable on x86_64. */
3379 x86_64_immediate_operand (op, mode)
3381 enum machine_mode mode;
3384 return immediate_operand (op, mode);
3385 return x86_64_sign_extended_value (op);
3388 /* Return nonzero if OP is immediate operand representable on x86_64. */
3391 x86_64_zext_immediate_operand (op, mode)
3393 enum machine_mode mode ATTRIBUTE_UNUSED;
3395 return x86_64_zero_extended_value (op);
3398 /* Return nonzero if OP is (const_int 1), else return zero. */
3401 const_int_1_operand (op, mode)
3403 enum machine_mode mode ATTRIBUTE_UNUSED;
3405 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3408 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3409 for shift & compare patterns, as shifting by 0 does not change flags),
3410 else return zero. */
3413 const_int_1_31_operand (op, mode)
3415 enum machine_mode mode ATTRIBUTE_UNUSED;
3417 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3420 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3421 reference and a constant. */
3424 symbolic_operand (op, mode)
3426 enum machine_mode mode ATTRIBUTE_UNUSED;
3428 switch (GET_CODE (op))
3436 if (GET_CODE (op) == SYMBOL_REF
3437 || GET_CODE (op) == LABEL_REF
3438 || (GET_CODE (op) == UNSPEC
3439 && (XINT (op, 1) == UNSPEC_GOT
3440 || XINT (op, 1) == UNSPEC_GOTOFF
3441 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3443 if (GET_CODE (op) != PLUS
3444 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3448 if (GET_CODE (op) == SYMBOL_REF
3449 || GET_CODE (op) == LABEL_REF)
3451 /* Only @GOTOFF gets offsets. */
3452 if (GET_CODE (op) != UNSPEC
3453 || XINT (op, 1) != UNSPEC_GOTOFF)
3456 op = XVECEXP (op, 0, 0);
3457 if (GET_CODE (op) == SYMBOL_REF
3458 || GET_CODE (op) == LABEL_REF)
3467 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3470 pic_symbolic_operand (op, mode)
3472 enum machine_mode mode ATTRIBUTE_UNUSED;
3474 if (GET_CODE (op) != CONST)
3479 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3484 if (GET_CODE (op) == UNSPEC)
3486 if (GET_CODE (op) != PLUS
3487 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3490 if (GET_CODE (op) == UNSPEC)
3496 /* Return true if OP is a symbolic operand that resolves locally. */
3499 local_symbolic_operand (op, mode)
3501 enum machine_mode mode ATTRIBUTE_UNUSED;
3503 if (GET_CODE (op) == CONST
3504 && GET_CODE (XEXP (op, 0)) == PLUS
3505 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3506 op = XEXP (XEXP (op, 0), 0);
3508 if (GET_CODE (op) == LABEL_REF)
3511 if (GET_CODE (op) != SYMBOL_REF)
3514 if (SYMBOL_REF_LOCAL_P (op))
3517 /* There is, however, a not insubstantial body of code in the rest of
3518 the compiler that assumes it can just stick the results of
3519 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3520 /* ??? This is a hack. Should update the body of the compiler to
3521 always create a DECL an invoke targetm.encode_section_info. */
3522 if (strncmp (XSTR (op, 0), internal_label_prefix,
3523 internal_label_prefix_len) == 0)
3529 /* Test for various thread-local symbols. */
3532 tls_symbolic_operand (op, mode)
3534 enum machine_mode mode ATTRIBUTE_UNUSED;
3536 if (GET_CODE (op) != SYMBOL_REF)
3538 return SYMBOL_REF_TLS_MODEL (op);
3542 tls_symbolic_operand_1 (op, kind)
3544 enum tls_model kind;
3546 if (GET_CODE (op) != SYMBOL_REF)
3548 return SYMBOL_REF_TLS_MODEL (op) == kind;
3552 global_dynamic_symbolic_operand (op, mode)
3554 enum machine_mode mode ATTRIBUTE_UNUSED;
3556 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3560 local_dynamic_symbolic_operand (op, mode)
3562 enum machine_mode mode ATTRIBUTE_UNUSED;
3564 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3568 initial_exec_symbolic_operand (op, mode)
3570 enum machine_mode mode ATTRIBUTE_UNUSED;
3572 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3576 local_exec_symbolic_operand (op, mode)
3578 enum machine_mode mode ATTRIBUTE_UNUSED;
3580 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3583 /* Test for a valid operand for a call instruction. Don't allow the
3584 arg pointer register or virtual regs since they may decay into
3585 reg + const, which the patterns can't handle. */
3588 call_insn_operand (op, mode)
3590 enum machine_mode mode ATTRIBUTE_UNUSED;
3592 /* Disallow indirect through a virtual register. This leads to
3593 compiler aborts when trying to eliminate them. */
3594 if (GET_CODE (op) == REG
3595 && (op == arg_pointer_rtx
3596 || op == frame_pointer_rtx
3597 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3598 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3601 /* Disallow `call 1234'. Due to varying assembler lameness this
3602 gets either rejected or translated to `call .+1234'. */
3603 if (GET_CODE (op) == CONST_INT)
3606 /* Explicitly allow SYMBOL_REF even if pic. */
3607 if (GET_CODE (op) == SYMBOL_REF)
3610 /* Otherwise we can allow any general_operand in the address. */
3611 return general_operand (op, Pmode);
3614 /* Test for a valid operand for a call instruction. Don't allow the
3615 arg pointer register or virtual regs since they may decay into
3616 reg + const, which the patterns can't handle. */
3619 sibcall_insn_operand (op, mode)
3621 enum machine_mode mode ATTRIBUTE_UNUSED;
3623 /* Disallow indirect through a virtual register. This leads to
3624 compiler aborts when trying to eliminate them. */
3625 if (GET_CODE (op) == REG
3626 && (op == arg_pointer_rtx
3627 || op == frame_pointer_rtx
3628 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3629 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3632 /* Explicitly allow SYMBOL_REF even if pic. */
3633 if (GET_CODE (op) == SYMBOL_REF)
3636 /* Otherwise we can only allow register operands. */
3637 return register_operand (op, Pmode);
3641 constant_call_address_operand (op, mode)
3643 enum machine_mode mode ATTRIBUTE_UNUSED;
3645 if (GET_CODE (op) == CONST
3646 && GET_CODE (XEXP (op, 0)) == PLUS
3647 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3648 op = XEXP (XEXP (op, 0), 0);
3649 return GET_CODE (op) == SYMBOL_REF;
3652 /* Match exactly zero and one. */
3655 const0_operand (op, mode)
3657 enum machine_mode mode;
3659 return op == CONST0_RTX (mode);
3663 const1_operand (op, mode)
3665 enum machine_mode mode ATTRIBUTE_UNUSED;
3667 return op == const1_rtx;
3670 /* Match 2, 4, or 8. Used for leal multiplicands. */
3673 const248_operand (op, mode)
3675 enum machine_mode mode ATTRIBUTE_UNUSED;
3677 return (GET_CODE (op) == CONST_INT
3678 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3681 /* True if this is a constant appropriate for an increment or decrement. */
3684 incdec_operand (op, mode)
3686 enum machine_mode mode ATTRIBUTE_UNUSED;
3688 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3689 registers, since carry flag is not set. */
3690 if (TARGET_PENTIUM4 && !optimize_size)
3692 return op == const1_rtx || op == constm1_rtx;
3695 /* Return nonzero if OP is acceptable as operand of DImode shift
3699 shiftdi_operand (op, mode)
3701 enum machine_mode mode ATTRIBUTE_UNUSED;
3704 return nonimmediate_operand (op, mode);
3706 return register_operand (op, mode);
3709 /* Return false if this is the stack pointer, or any other fake
3710 register eliminable to the stack pointer. Otherwise, this is
3713 This is used to prevent esp from being used as an index reg.
3714 Which would only happen in pathological cases. */
3717 reg_no_sp_operand (op, mode)
3719 enum machine_mode mode;
3722 if (GET_CODE (t) == SUBREG)
3724 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3727 return register_operand (op, mode);
3731 mmx_reg_operand (op, mode)
3733 enum machine_mode mode ATTRIBUTE_UNUSED;
3735 return MMX_REG_P (op);
3738 /* Return false if this is any eliminable register. Otherwise
3742 general_no_elim_operand (op, mode)
3744 enum machine_mode mode;
3747 if (GET_CODE (t) == SUBREG)
3749 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3750 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3751 || t == virtual_stack_dynamic_rtx)
3754 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3755 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3758 return general_operand (op, mode);
3761 /* Return false if this is any eliminable register. Otherwise
3762 register_operand or const_int. */
3765 nonmemory_no_elim_operand (op, mode)
3767 enum machine_mode mode;
3770 if (GET_CODE (t) == SUBREG)
3772 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3773 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3774 || t == virtual_stack_dynamic_rtx)
3777 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3780 /* Return false if this is any eliminable register or stack register,
3781 otherwise work like register_operand. */
3784 index_register_operand (op, mode)
3786 enum machine_mode mode;
3789 if (GET_CODE (t) == SUBREG)
3793 if (t == arg_pointer_rtx
3794 || t == frame_pointer_rtx
3795 || t == virtual_incoming_args_rtx
3796 || t == virtual_stack_vars_rtx
3797 || t == virtual_stack_dynamic_rtx
3798 || REGNO (t) == STACK_POINTER_REGNUM)
3801 return general_operand (op, mode);
3804 /* Return true if op is a Q_REGS class register. */
3807 q_regs_operand (op, mode)
3809 enum machine_mode mode;
3811 if (mode != VOIDmode && GET_MODE (op) != mode)
3813 if (GET_CODE (op) == SUBREG)
3814 op = SUBREG_REG (op);
3815 return ANY_QI_REG_P (op);
3818 /* Return true if op is an flags register. */
3821 flags_reg_operand (op, mode)
3823 enum machine_mode mode;
3825 if (mode != VOIDmode && GET_MODE (op) != mode)
3827 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3830 /* Return true if op is a NON_Q_REGS class register. */
3833 non_q_regs_operand (op, mode)
3835 enum machine_mode mode;
3837 if (mode != VOIDmode && GET_MODE (op) != mode)
3839 if (GET_CODE (op) == SUBREG)
3840 op = SUBREG_REG (op);
3841 return NON_QI_REG_P (op);
3845 zero_extended_scalar_load_operand (op, mode)
3847 enum machine_mode mode ATTRIBUTE_UNUSED;
3850 if (GET_CODE (op) != MEM)
3852 op = maybe_get_pool_constant (op);
3855 if (GET_CODE (op) != CONST_VECTOR)
3858 (GET_MODE_SIZE (GET_MODE (op)) /
3859 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3860 for (n_elts--; n_elts > 0; n_elts--)
3862 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3863 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3869 /* Return 1 when OP is operand acceptable for standard SSE move. */
3871 vector_move_operand (op, mode)
3873 enum machine_mode mode;
3875 if (nonimmediate_operand (op, mode))
3877 if (GET_MODE (op) != mode && mode != VOIDmode)
3879 return (op == CONST0_RTX (GET_MODE (op)));
3882 /* Return true if op if a valid address, and does not contain
3883 a segment override. */
3886 no_seg_address_operand (op, mode)
3888 enum machine_mode mode;
3890 struct ix86_address parts;
3892 if (! address_operand (op, mode))
3895 if (! ix86_decompose_address (op, &parts))
3898 return parts.seg == SEG_DEFAULT;
3901 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3904 sse_comparison_operator (op, mode)
3906 enum machine_mode mode ATTRIBUTE_UNUSED;
3908 enum rtx_code code = GET_CODE (op);
3911 /* Operations supported directly. */
3921 /* These are equivalent to ones above in non-IEEE comparisons. */
3928 return !TARGET_IEEE_FP;
3933 /* Return 1 if OP is a valid comparison operator in valid mode. */
3935 ix86_comparison_operator (op, mode)
3937 enum machine_mode mode;
3939 enum machine_mode inmode;
3940 enum rtx_code code = GET_CODE (op);
3941 if (mode != VOIDmode && GET_MODE (op) != mode)
3943 if (GET_RTX_CLASS (code) != '<')
3945 inmode = GET_MODE (XEXP (op, 0));
3947 if (inmode == CCFPmode || inmode == CCFPUmode)
3949 enum rtx_code second_code, bypass_code;
3950 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3951 return (bypass_code == NIL && second_code == NIL);
3958 if (inmode == CCmode || inmode == CCGCmode
3959 || inmode == CCGOCmode || inmode == CCNOmode)
3962 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3963 if (inmode == CCmode)
3967 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3975 /* Return 1 if OP is a valid comparison operator testing carry flag
3978 ix86_carry_flag_operator (op, mode)
3980 enum machine_mode mode;
3982 enum machine_mode inmode;
3983 enum rtx_code code = GET_CODE (op);
3985 if (mode != VOIDmode && GET_MODE (op) != mode)
3987 if (GET_RTX_CLASS (code) != '<')
3989 inmode = GET_MODE (XEXP (op, 0));
3990 if (GET_CODE (XEXP (op, 0)) != REG
3991 || REGNO (XEXP (op, 0)) != 17
3992 || XEXP (op, 1) != const0_rtx)
3995 if (inmode == CCFPmode || inmode == CCFPUmode)
3997 enum rtx_code second_code, bypass_code;
3999 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4000 if (bypass_code != NIL || second_code != NIL)
4002 code = ix86_fp_compare_code_to_integer (code);
4004 else if (inmode != CCmode)
4009 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
4012 fcmov_comparison_operator (op, mode)
4014 enum machine_mode mode;
4016 enum machine_mode inmode;
4017 enum rtx_code code = GET_CODE (op);
4019 if (mode != VOIDmode && GET_MODE (op) != mode)
4021 if (GET_RTX_CLASS (code) != '<')
4023 inmode = GET_MODE (XEXP (op, 0));
4024 if (inmode == CCFPmode || inmode == CCFPUmode)
4026 enum rtx_code second_code, bypass_code;
4028 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4029 if (bypass_code != NIL || second_code != NIL)
4031 code = ix86_fp_compare_code_to_integer (code);
4033 /* i387 supports just limited amount of conditional codes. */
4036 case LTU: case GTU: case LEU: case GEU:
4037 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4040 case ORDERED: case UNORDERED:
4048 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
4051 promotable_binary_operator (op, mode)
4053 enum machine_mode mode ATTRIBUTE_UNUSED;
4055 switch (GET_CODE (op))
4058 /* Modern CPUs have same latency for HImode and SImode multiply,
4059 but 386 and 486 do HImode multiply faster. */
4060 return ix86_tune > PROCESSOR_I486;
4072 /* Nearly general operand, but accept any const_double, since we wish
4073 to be able to drop them into memory rather than have them get pulled
4077 cmp_fp_expander_operand (op, mode)
4079 enum machine_mode mode;
4081 if (mode != VOIDmode && mode != GET_MODE (op))
4083 if (GET_CODE (op) == CONST_DOUBLE)
4085 return general_operand (op, mode);
4088 /* Match an SI or HImode register for a zero_extract. */
4091 ext_register_operand (op, mode)
4093 enum machine_mode mode ATTRIBUTE_UNUSED;
4096 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4097 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4100 if (!register_operand (op, VOIDmode))
4103 /* Be careful to accept only registers having upper parts. */
4104 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4105 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4108 /* Return 1 if this is a valid binary floating-point operation.
4109 OP is the expression matched, and MODE is its mode. */
4112 binary_fp_operator (op, mode)
4114 enum machine_mode mode;
4116 if (mode != VOIDmode && mode != GET_MODE (op))
4119 switch (GET_CODE (op))
4125 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4133 mult_operator (op, mode)
4135 enum machine_mode mode ATTRIBUTE_UNUSED;
4137 return GET_CODE (op) == MULT;
4141 div_operator (op, mode)
4143 enum machine_mode mode ATTRIBUTE_UNUSED;
4145 return GET_CODE (op) == DIV;
4149 arith_or_logical_operator (op, mode)
4151 enum machine_mode mode;
4153 return ((mode == VOIDmode || GET_MODE (op) == mode)
4154 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4155 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4158 /* Returns 1 if OP is memory operand with a displacement. */
4161 memory_displacement_operand (op, mode)
4163 enum machine_mode mode;
4165 struct ix86_address parts;
4167 if (! memory_operand (op, mode))
4170 if (! ix86_decompose_address (XEXP (op, 0), &parts))
4173 return parts.disp != NULL_RTX;
4176 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4177 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4179 ??? It seems likely that this will only work because cmpsi is an
4180 expander, and no actual insns use this. */
4183 cmpsi_operand (op, mode)
4185 enum machine_mode mode;
4187 if (nonimmediate_operand (op, mode))
4190 if (GET_CODE (op) == AND
4191 && GET_MODE (op) == SImode
4192 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4193 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4194 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4195 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4196 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4197 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4203 /* Returns 1 if OP is memory operand that can not be represented by the
4207 long_memory_operand (op, mode)
4209 enum machine_mode mode;
4211 if (! memory_operand (op, mode))
4214 return memory_address_length (op) != 0;
4217 /* Return nonzero if the rtx is known aligned. */
4220 aligned_operand (op, mode)
4222 enum machine_mode mode;
4224 struct ix86_address parts;
4226 if (!general_operand (op, mode))
4229 /* Registers and immediate operands are always "aligned". */
4230 if (GET_CODE (op) != MEM)
4233 /* Don't even try to do any aligned optimizations with volatiles. */
4234 if (MEM_VOLATILE_P (op))
4239 /* Pushes and pops are only valid on the stack pointer. */
4240 if (GET_CODE (op) == PRE_DEC
4241 || GET_CODE (op) == POST_INC)
4244 /* Decode the address. */
4245 if (! ix86_decompose_address (op, &parts))
4248 if (parts.base && GET_CODE (parts.base) == SUBREG)
4249 parts.base = SUBREG_REG (parts.base);
4250 if (parts.index && GET_CODE (parts.index) == SUBREG)
4251 parts.index = SUBREG_REG (parts.index);
4253 /* Look for some component that isn't known to be aligned. */
4257 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4262 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4267 if (GET_CODE (parts.disp) != CONST_INT
4268 || (INTVAL (parts.disp) & 3) != 0)
4272 /* Didn't find one -- this must be an aligned address. */
4276 /* Initialize the table of extra 80387 mathematical constants. */
4279 init_ext_80387_constants ()
4281 static const char * cst[5] =
4283 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4284 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4285 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4286 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4287 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4291 for (i = 0; i < 5; i++)
4293 real_from_string (&ext_80387_constants_table[i], cst[i]);
4294 /* Ensure each constant is rounded to XFmode precision. */
4295 real_convert (&ext_80387_constants_table[i],
4296 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode,
4297 &ext_80387_constants_table[i]);
4300 ext_80387_constants_init = 1;
4303 /* Return true if the constant is something that can be loaded with
4304 a special instruction. */
4307 standard_80387_constant_p (x)
4310 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4313 if (x == CONST0_RTX (GET_MODE (x)))
4315 if (x == CONST1_RTX (GET_MODE (x)))
4318 /* For XFmode constants, try to find a special 80387 instruction on
4319 those CPUs that benefit from them. */
4320 if ((GET_MODE (x) == XFmode || GET_MODE (x) == TFmode)
4321 && x86_ext_80387_constants & TUNEMASK)
4326 if (! ext_80387_constants_init)
4327 init_ext_80387_constants ();
4329 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4330 for (i = 0; i < 5; i++)
4331 if (real_identical (&r, &ext_80387_constants_table[i]))
4338 /* Return the opcode of the special instruction to be used to load
4342 standard_80387_constant_opcode (x)
4345 switch (standard_80387_constant_p (x))
4365 /* Return the CONST_DOUBLE representing the 80387 constant that is
4366 loaded by the specified special instruction. The argument IDX
4367 matches the return value from standard_80387_constant_p. */
4370 standard_80387_constant_rtx (idx)
4375 if (! ext_80387_constants_init)
4376 init_ext_80387_constants ();
4392 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4393 TARGET_128BIT_LONG_DOUBLE ? TFmode : XFmode);
4396 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4399 standard_sse_constant_p (x)
4402 if (x == const0_rtx)
4404 return (x == CONST0_RTX (GET_MODE (x)));
4407 /* Returns 1 if OP contains a symbol reference */
4410 symbolic_reference_mentioned_p (op)
4413 register const char *fmt;
4416 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4419 fmt = GET_RTX_FORMAT (GET_CODE (op));
4420 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4426 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4427 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4431 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4438 /* Return 1 if it is appropriate to emit `ret' instructions in the
4439 body of a function. Do this only if the epilogue is simple, needing a
4440 couple of insns. Prior to reloading, we can't tell how many registers
4441 must be saved, so return 0 then. Return 0 if there is no frame
4442 marker to de-allocate.
4444 If NON_SAVING_SETJMP is defined and true, then it is not possible
4445 for the epilogue to be simple, so return 0. This is a special case
4446 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4447 until final, but jump_optimize may need to know sooner if a
4451 ix86_can_use_return_insn_p ()
4453 struct ix86_frame frame;
4455 #ifdef NON_SAVING_SETJMP
4456 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4460 if (! reload_completed || frame_pointer_needed)
4463 /* Don't allow more than 32 pop, since that's all we can do
4464 with one instruction. */
4465 if (current_function_pops_args
4466 && current_function_args_size >= 32768)
4469 ix86_compute_frame_layout (&frame);
4470 return frame.to_allocate == 0 && frame.nregs == 0;
4473 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4475 x86_64_sign_extended_value (value)
4478 switch (GET_CODE (value))
4480 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4481 to be at least 32 and this all acceptable constants are
4482 represented as CONST_INT. */
4484 if (HOST_BITS_PER_WIDE_INT == 32)
4488 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4489 return trunc_int_for_mode (val, SImode) == val;
4493 /* For certain code models, the symbolic references are known to fit.
4494 in CM_SMALL_PIC model we know it fits if it is local to the shared
4495 library. Don't count TLS SYMBOL_REFs here, since they should fit
4496 only if inside of UNSPEC handled below. */
4498 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4500 /* For certain code models, the code is near as well. */
4502 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4503 || ix86_cmodel == CM_KERNEL);
4505 /* We also may accept the offsetted memory references in certain special
4508 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4509 switch (XINT (XEXP (value, 0), 1))
4511 case UNSPEC_GOTPCREL:
4513 case UNSPEC_GOTNTPOFF:
4519 if (GET_CODE (XEXP (value, 0)) == PLUS)
4521 rtx op1 = XEXP (XEXP (value, 0), 0);
4522 rtx op2 = XEXP (XEXP (value, 0), 1);
4523 HOST_WIDE_INT offset;
4525 if (ix86_cmodel == CM_LARGE)
4527 if (GET_CODE (op2) != CONST_INT)
4529 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4530 switch (GET_CODE (op1))
4533 /* For CM_SMALL assume that latest object is 16MB before
4534 end of 31bits boundary. We may also accept pretty
4535 large negative constants knowing that all objects are
4536 in the positive half of address space. */
4537 if (ix86_cmodel == CM_SMALL
4538 && offset < 16*1024*1024
4539 && trunc_int_for_mode (offset, SImode) == offset)
4541 /* For CM_KERNEL we know that all object resist in the
4542 negative half of 32bits address space. We may not
4543 accept negative offsets, since they may be just off
4544 and we may accept pretty large positive ones. */
4545 if (ix86_cmodel == CM_KERNEL
4547 && trunc_int_for_mode (offset, SImode) == offset)
4551 /* These conditions are similar to SYMBOL_REF ones, just the
4552 constraints for code models differ. */
4553 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4554 && offset < 16*1024*1024
4555 && trunc_int_for_mode (offset, SImode) == offset)
4557 if (ix86_cmodel == CM_KERNEL
4559 && trunc_int_for_mode (offset, SImode) == offset)
4563 switch (XINT (op1, 1))
4568 && trunc_int_for_mode (offset, SImode) == offset)
4582 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4584 x86_64_zero_extended_value (value)
4587 switch (GET_CODE (value))
4590 if (HOST_BITS_PER_WIDE_INT == 32)
4591 return (GET_MODE (value) == VOIDmode
4592 && !CONST_DOUBLE_HIGH (value));
4596 if (HOST_BITS_PER_WIDE_INT == 32)
4597 return INTVAL (value) >= 0;
4599 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4602 /* For certain code models, the symbolic references are known to fit. */
4604 return ix86_cmodel == CM_SMALL;
4606 /* For certain code models, the code is near as well. */
4608 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4610 /* We also may accept the offsetted memory references in certain special
4613 if (GET_CODE (XEXP (value, 0)) == PLUS)
4615 rtx op1 = XEXP (XEXP (value, 0), 0);
4616 rtx op2 = XEXP (XEXP (value, 0), 1);
4618 if (ix86_cmodel == CM_LARGE)
4620 switch (GET_CODE (op1))
4624 /* For small code model we may accept pretty large positive
4625 offsets, since one bit is available for free. Negative
4626 offsets are limited by the size of NULL pointer area
4627 specified by the ABI. */
4628 if (ix86_cmodel == CM_SMALL
4629 && GET_CODE (op2) == CONST_INT
4630 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4631 && (trunc_int_for_mode (INTVAL (op2), SImode)
4634 /* ??? For the kernel, we may accept adjustment of
4635 -0x10000000, since we know that it will just convert
4636 negative address space to positive, but perhaps this
4637 is not worthwhile. */
4640 /* These conditions are similar to SYMBOL_REF ones, just the
4641 constraints for code models differ. */
4642 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4643 && GET_CODE (op2) == CONST_INT
4644 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4645 && (trunc_int_for_mode (INTVAL (op2), SImode)
4659 /* Value should be nonzero if functions must have frame pointers.
4660 Zero means the frame pointer need not be set up (and parms may
4661 be accessed via the stack pointer) in functions that seem suitable. */
4664 ix86_frame_pointer_required ()
4666 /* If we accessed previous frames, then the generated code expects
4667 to be able to access the saved ebp value in our frame. */
4668 if (cfun->machine->accesses_prev_frame)
4671 /* Several x86 os'es need a frame pointer for other reasons,
4672 usually pertaining to setjmp. */
4673 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4676 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4677 the frame pointer by default. Turn it back on now if we've not
4678 got a leaf function. */
4679 if (TARGET_OMIT_LEAF_FRAME_POINTER
4680 && (!current_function_is_leaf))
4683 if (current_function_profile)
4689 /* Record that the current function accesses previous call frames. */
4692 ix86_setup_frame_addresses ()
4694 cfun->machine->accesses_prev_frame = 1;
4697 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4698 # define USE_HIDDEN_LINKONCE 1
4700 # define USE_HIDDEN_LINKONCE 0
4703 static int pic_labels_used;
4705 /* Fills in the label name that should be used for a pc thunk for
4706 the given register. */
4709 get_pc_thunk_name (name, regno)
4713 if (USE_HIDDEN_LINKONCE)
4714 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4716 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4720 /* This function generates code for -fpic that loads %ebx with
4721 the return address of the caller and then returns. */
4729 for (regno = 0; regno < 8; ++regno)
4733 if (! ((pic_labels_used >> regno) & 1))
4736 get_pc_thunk_name (name, regno);
4738 if (USE_HIDDEN_LINKONCE)
4742 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4744 TREE_PUBLIC (decl) = 1;
4745 TREE_STATIC (decl) = 1;
4746 DECL_ONE_ONLY (decl) = 1;
4748 (*targetm.asm_out.unique_section) (decl, 0);
4749 named_section (decl, NULL, 0);
4751 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4752 fputs ("\t.hidden\t", asm_out_file);
4753 assemble_name (asm_out_file, name);
4754 fputc ('\n', asm_out_file);
4755 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4760 ASM_OUTPUT_LABEL (asm_out_file, name);
4763 xops[0] = gen_rtx_REG (SImode, regno);
4764 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4765 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4766 output_asm_insn ("ret", xops);
4769 if (NEED_INDICATE_EXEC_STACK)
4770 file_end_indicate_exec_stack ();
4773 /* Emit code for the SET_GOT patterns. */
4776 output_set_got (dest)
4782 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4784 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4786 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4789 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4791 output_asm_insn ("call\t%a2", xops);
4794 /* Output the "canonical" label name ("Lxx$pb") here too. This
4795 is what will be referred to by the Mach-O PIC subsystem. */
4796 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4798 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4799 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4802 output_asm_insn ("pop{l}\t%0", xops);
4807 get_pc_thunk_name (name, REGNO (dest));
4808 pic_labels_used |= 1 << REGNO (dest);
4810 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4811 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4812 output_asm_insn ("call\t%X2", xops);
4815 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4816 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4817 else if (!TARGET_MACHO)
4818 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4823 /* Generate an "push" pattern for input ARG. */
4829 return gen_rtx_SET (VOIDmode,
4831 gen_rtx_PRE_DEC (Pmode,
4832 stack_pointer_rtx)),
4836 /* Return >= 0 if there is an unused call-clobbered register available
4837 for the entire function. */
4840 ix86_select_alt_pic_regnum ()
4842 if (current_function_is_leaf && !current_function_profile)
4845 for (i = 2; i >= 0; --i)
4846 if (!regs_ever_live[i])
4850 return INVALID_REGNUM;
4853 /* Return 1 if we need to save REGNO. */
4855 ix86_save_reg (regno, maybe_eh_return)
4857 int maybe_eh_return;
4859 if (pic_offset_table_rtx
4860 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4861 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4862 || current_function_profile
4863 || current_function_calls_eh_return
4864 || current_function_uses_const_pool))
4866 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4871 if (current_function_calls_eh_return && maybe_eh_return)
4876 unsigned test = EH_RETURN_DATA_REGNO (i);
4877 if (test == INVALID_REGNUM)
4884 return (regs_ever_live[regno]
4885 && !call_used_regs[regno]
4886 && !fixed_regs[regno]
4887 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4890 /* Return number of registers to be saved on the stack. */
4898 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4899 if (ix86_save_reg (regno, true))
4904 /* Return the offset between two registers, one to be eliminated, and the other
4905 its replacement, at the start of a routine. */
4908 ix86_initial_elimination_offset (from, to)
4912 struct ix86_frame frame;
4913 ix86_compute_frame_layout (&frame);
4915 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4916 return frame.hard_frame_pointer_offset;
4917 else if (from == FRAME_POINTER_REGNUM
4918 && to == HARD_FRAME_POINTER_REGNUM)
4919 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4922 if (to != STACK_POINTER_REGNUM)
4924 else if (from == ARG_POINTER_REGNUM)
4925 return frame.stack_pointer_offset;
4926 else if (from != FRAME_POINTER_REGNUM)
4929 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4933 /* Fill structure ix86_frame about frame of currently computed function. */
4936 ix86_compute_frame_layout (frame)
4937 struct ix86_frame *frame;
4939 HOST_WIDE_INT total_size;
4940 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4942 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4943 HOST_WIDE_INT size = get_frame_size ();
4945 frame->nregs = ix86_nsaved_regs ();
4948 /* During reload iteration the amount of registers saved can change.
4949 Recompute the value as needed. Do not recompute when amount of registers
4950 didn't change as reload does mutiple calls to the function and does not
4951 expect the decision to change within single iteration. */
4953 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4955 int count = frame->nregs;
4957 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4958 /* The fast prologue uses move instead of push to save registers. This
4959 is significantly longer, but also executes faster as modern hardware
4960 can execute the moves in parallel, but can't do that for push/pop.
4962 Be careful about choosing what prologue to emit: When function takes
4963 many instructions to execute we may use slow version as well as in
4964 case function is known to be outside hot spot (this is known with
4965 feedback only). Weight the size of function by number of registers
4966 to save as it is cheap to use one or two push instructions but very
4967 slow to use many of them. */
4969 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4970 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4971 || (flag_branch_probabilities
4972 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4973 cfun->machine->use_fast_prologue_epilogue = false;
4975 cfun->machine->use_fast_prologue_epilogue
4976 = !expensive_function_p (count);
4978 if (TARGET_PROLOGUE_USING_MOVE
4979 && cfun->machine->use_fast_prologue_epilogue)
4980 frame->save_regs_using_mov = true;
4982 frame->save_regs_using_mov = false;
4985 /* Skip return address and saved base pointer. */
4986 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4988 frame->hard_frame_pointer_offset = offset;
4990 /* Do some sanity checking of stack_alignment_needed and
4991 preferred_alignment, since i386 port is the only using those features
4992 that may break easily. */
4994 if (size && !stack_alignment_needed)
4996 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4998 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5000 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5003 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5004 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5006 /* Register save area */
5007 offset += frame->nregs * UNITS_PER_WORD;
5010 if (ix86_save_varrargs_registers)
5012 offset += X86_64_VARARGS_SIZE;
5013 frame->va_arg_size = X86_64_VARARGS_SIZE;
5016 frame->va_arg_size = 0;
5018 /* Align start of frame for local function. */
5019 frame->padding1 = ((offset + stack_alignment_needed - 1)
5020 & -stack_alignment_needed) - offset;
5022 offset += frame->padding1;
5024 /* Frame pointer points here. */
5025 frame->frame_pointer_offset = offset;
5029 /* Add outgoing arguments area. Can be skipped if we eliminated
5030 all the function calls as dead code. */
5031 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
5033 offset += current_function_outgoing_args_size;
5034 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5037 frame->outgoing_arguments_size = 0;
5039 /* Align stack boundary. Only needed if we're calling another function
5041 if (!current_function_is_leaf || current_function_calls_alloca)
5042 frame->padding2 = ((offset + preferred_alignment - 1)
5043 & -preferred_alignment) - offset;
5045 frame->padding2 = 0;
5047 offset += frame->padding2;
5049 /* We've reached end of stack frame. */
5050 frame->stack_pointer_offset = offset;
5052 /* Size prologue needs to allocate. */
5053 frame->to_allocate =
5054 (size + frame->padding1 + frame->padding2
5055 + frame->outgoing_arguments_size + frame->va_arg_size);
5057 if (!frame->to_allocate && frame->nregs <= 1)
5058 frame->save_regs_using_mov = false;
5060 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5061 && current_function_is_leaf)
5063 frame->red_zone_size = frame->to_allocate;
5064 if (frame->save_regs_using_mov)
5065 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5066 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5067 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5070 frame->red_zone_size = 0;
5071 frame->to_allocate -= frame->red_zone_size;
5072 frame->stack_pointer_offset -= frame->red_zone_size;
5074 fprintf (stderr, "nregs: %i\n", frame->nregs);
5075 fprintf (stderr, "size: %i\n", size);
5076 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5077 fprintf (stderr, "padding1: %i\n", frame->padding1);
5078 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5079 fprintf (stderr, "padding2: %i\n", frame->padding2);
5080 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5081 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5082 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5083 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5084 frame->hard_frame_pointer_offset);
5085 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5089 /* Emit code to save registers in the prologue. */
5092 ix86_emit_save_regs ()
5097 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5098 if (ix86_save_reg (regno, true))
5100 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5101 RTX_FRAME_RELATED_P (insn) = 1;
5105 /* Emit code to save registers using MOV insns. First register
5106 is restored from POINTER + OFFSET. */
5108 ix86_emit_save_regs_using_mov (pointer, offset)
5110 HOST_WIDE_INT offset;
5115 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5116 if (ix86_save_reg (regno, true))
5118 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5120 gen_rtx_REG (Pmode, regno));
5121 RTX_FRAME_RELATED_P (insn) = 1;
5122 offset += UNITS_PER_WORD;
5126 /* Expand the prologue into a bunch of separate insns. */
5129 ix86_expand_prologue ()
5133 struct ix86_frame frame;
5134 HOST_WIDE_INT allocate;
5136 ix86_compute_frame_layout (&frame);
5138 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5139 slower on all targets. Also sdb doesn't like it. */
5141 if (frame_pointer_needed)
5143 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5144 RTX_FRAME_RELATED_P (insn) = 1;
5146 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5147 RTX_FRAME_RELATED_P (insn) = 1;
5150 allocate = frame.to_allocate;
5152 if (!frame.save_regs_using_mov)
5153 ix86_emit_save_regs ();
5155 allocate += frame.nregs * UNITS_PER_WORD;
5157 /* When using red zone we may start register saving before allocating
5158 the stack frame saving one cycle of the prologue. */
5159 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5160 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5161 : stack_pointer_rtx,
5162 -frame.nregs * UNITS_PER_WORD);
5166 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5168 insn = emit_insn (gen_pro_epilogue_adjust_stack
5169 (stack_pointer_rtx, stack_pointer_rtx,
5170 GEN_INT (-allocate)));
5171 RTX_FRAME_RELATED_P (insn) = 1;
5175 /* ??? Is this only valid for Win32? */
5182 arg0 = gen_rtx_REG (SImode, 0);
5183 emit_move_insn (arg0, GEN_INT (allocate));
5185 sym = gen_rtx_MEM (FUNCTION_MODE,
5186 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
5187 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
5189 CALL_INSN_FUNCTION_USAGE (insn)
5190 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
5191 CALL_INSN_FUNCTION_USAGE (insn));
5193 /* Don't allow scheduling pass to move insns across __alloca
5195 emit_insn (gen_blockage (const0_rtx));
5197 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5199 if (!frame_pointer_needed || !frame.to_allocate)
5200 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5202 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5203 -frame.nregs * UNITS_PER_WORD);
5206 pic_reg_used = false;
5207 if (pic_offset_table_rtx
5208 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5209 || current_function_profile))
5211 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5213 if (alt_pic_reg_used != INVALID_REGNUM)
5214 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5216 pic_reg_used = true;
5221 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5223 /* Even with accurate pre-reload life analysis, we can wind up
5224 deleting all references to the pic register after reload.
5225 Consider if cross-jumping unifies two sides of a branch
5226 controlled by a comparison vs the only read from a global.
5227 In which case, allow the set_got to be deleted, though we're
5228 too late to do anything about the ebx save in the prologue. */
5229 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5232 /* Prevent function calls from be scheduled before the call to mcount.
5233 In the pic_reg_used case, make sure that the got load isn't deleted. */
5234 if (current_function_profile)
5235 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5238 /* Emit code to restore saved registers using MOV insns. First register
5239 is restored from POINTER + OFFSET. */
5241 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
5244 int maybe_eh_return;
5248 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5249 if (ix86_save_reg (regno, maybe_eh_return))
5251 emit_move_insn (gen_rtx_REG (Pmode, regno),
5252 adjust_address (gen_rtx_MEM (Pmode, pointer),
5254 offset += UNITS_PER_WORD;
5258 /* Restore function stack, frame, and registers. */
5261 ix86_expand_epilogue (style)
5265 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5266 struct ix86_frame frame;
5267 HOST_WIDE_INT offset;
5269 ix86_compute_frame_layout (&frame);
5271 /* Calculate start of saved registers relative to ebp. Special care
5272 must be taken for the normal return case of a function using
5273 eh_return: the eax and edx registers are marked as saved, but not
5274 restored along this path. */
5275 offset = frame.nregs;
5276 if (current_function_calls_eh_return && style != 2)
5278 offset *= -UNITS_PER_WORD;
5280 /* If we're only restoring one register and sp is not valid then
5281 using a move instruction to restore the register since it's
5282 less work than reloading sp and popping the register.
5284 The default code result in stack adjustment using add/lea instruction,
5285 while this code results in LEAVE instruction (or discrete equivalent),
5286 so it is profitable in some other cases as well. Especially when there
5287 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5288 and there is exactly one register to pop. This heuristic may need some
5289 tuning in future. */
5290 if ((!sp_valid && frame.nregs <= 1)
5291 || (TARGET_EPILOGUE_USING_MOVE
5292 && cfun->machine->use_fast_prologue_epilogue
5293 && (frame.nregs > 1 || frame.to_allocate))
5294 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5295 || (frame_pointer_needed && TARGET_USE_LEAVE
5296 && cfun->machine->use_fast_prologue_epilogue
5297 && frame.nregs == 1)
5298 || current_function_calls_eh_return)
5300 /* Restore registers. We can use ebp or esp to address the memory
5301 locations. If both are available, default to ebp, since offsets
5302 are known to be small. Only exception is esp pointing directly to the
5303 end of block of saved registers, where we may simplify addressing
5306 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5307 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5308 frame.to_allocate, style == 2);
5310 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5311 offset, style == 2);
5313 /* eh_return epilogues need %ecx added to the stack pointer. */
5316 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5318 if (frame_pointer_needed)
5320 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5321 tmp = plus_constant (tmp, UNITS_PER_WORD);
5322 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5324 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5325 emit_move_insn (hard_frame_pointer_rtx, tmp);
5327 emit_insn (gen_pro_epilogue_adjust_stack
5328 (stack_pointer_rtx, sa, const0_rtx));
5332 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5333 tmp = plus_constant (tmp, (frame.to_allocate
5334 + frame.nregs * UNITS_PER_WORD));
5335 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5338 else if (!frame_pointer_needed)
5339 emit_insn (gen_pro_epilogue_adjust_stack
5340 (stack_pointer_rtx, stack_pointer_rtx,
5341 GEN_INT (frame.to_allocate
5342 + frame.nregs * UNITS_PER_WORD)));
5343 /* If not an i386, mov & pop is faster than "leave". */
5344 else if (TARGET_USE_LEAVE || optimize_size
5345 || !cfun->machine->use_fast_prologue_epilogue)
5346 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5349 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5350 hard_frame_pointer_rtx,
5353 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5355 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5360 /* First step is to deallocate the stack frame so that we can
5361 pop the registers. */
5364 if (!frame_pointer_needed)
5366 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5367 hard_frame_pointer_rtx,
5370 else if (frame.to_allocate)
5371 emit_insn (gen_pro_epilogue_adjust_stack
5372 (stack_pointer_rtx, stack_pointer_rtx,
5373 GEN_INT (frame.to_allocate)));
5375 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5376 if (ix86_save_reg (regno, false))
5379 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5381 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5383 if (frame_pointer_needed)
5385 /* Leave results in shorter dependency chains on CPUs that are
5386 able to grok it fast. */
5387 if (TARGET_USE_LEAVE)
5388 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5389 else if (TARGET_64BIT)
5390 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5392 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5396 /* Sibcall epilogues don't want a return instruction. */
5400 if (current_function_pops_args && current_function_args_size)
5402 rtx popc = GEN_INT (current_function_pops_args);
5404 /* i386 can only pop 64K bytes. If asked to pop more, pop
5405 return address, do explicit add, and jump indirectly to the
5408 if (current_function_pops_args >= 65536)
5410 rtx ecx = gen_rtx_REG (SImode, 2);
5412 /* There are is no "pascal" calling convention in 64bit ABI. */
5416 emit_insn (gen_popsi1 (ecx));
5417 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5418 emit_jump_insn (gen_return_indirect_internal (ecx));
5421 emit_jump_insn (gen_return_pop_internal (popc));
5424 emit_jump_insn (gen_return_internal ());
5427 /* Reset from the function's potential modifications. */
5430 ix86_output_function_epilogue (file, size)
5431 FILE *file ATTRIBUTE_UNUSED;
5432 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5434 if (pic_offset_table_rtx)
5435 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5438 /* Extract the parts of an RTL expression that is a valid memory address
5439 for an instruction. Return 0 if the structure of the address is
5440 grossly off. Return -1 if the address contains ASHIFT, so it is not
5441 strictly valid, but still used for computing length of lea instruction. */
5444 ix86_decompose_address (addr, out)
5446 struct ix86_address *out;
5448 rtx base = NULL_RTX;
5449 rtx index = NULL_RTX;
5450 rtx disp = NULL_RTX;
5451 HOST_WIDE_INT scale = 1;
5452 rtx scale_rtx = NULL_RTX;
5454 enum ix86_address_seg seg = SEG_DEFAULT;
5456 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5458 else if (GET_CODE (addr) == PLUS)
5468 addends[n++] = XEXP (op, 1);
5471 while (GET_CODE (op) == PLUS);
5476 for (i = n; i >= 0; --i)
5479 switch (GET_CODE (op))
5484 index = XEXP (op, 0);
5485 scale_rtx = XEXP (op, 1);
5489 if (XINT (op, 1) == UNSPEC_TP
5490 && TARGET_TLS_DIRECT_SEG_REFS
5491 && seg == SEG_DEFAULT)
5492 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5521 else if (GET_CODE (addr) == MULT)
5523 index = XEXP (addr, 0); /* index*scale */
5524 scale_rtx = XEXP (addr, 1);
5526 else if (GET_CODE (addr) == ASHIFT)
5530 /* We're called for lea too, which implements ashift on occasion. */
5531 index = XEXP (addr, 0);
5532 tmp = XEXP (addr, 1);
5533 if (GET_CODE (tmp) != CONST_INT)
5535 scale = INTVAL (tmp);
5536 if ((unsigned HOST_WIDE_INT) scale > 3)
5542 disp = addr; /* displacement */
5544 /* Extract the integral value of scale. */
5547 if (GET_CODE (scale_rtx) != CONST_INT)
5549 scale = INTVAL (scale_rtx);
5552 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5553 if (base && index && scale == 1
5554 && (index == arg_pointer_rtx
5555 || index == frame_pointer_rtx
5556 || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5563 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5564 if ((base == hard_frame_pointer_rtx
5565 || base == frame_pointer_rtx
5566 || base == arg_pointer_rtx) && !disp)
5569 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5570 Avoid this by transforming to [%esi+0]. */
5571 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5572 && base && !index && !disp
5574 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5577 /* Special case: encode reg+reg instead of reg*2. */
5578 if (!base && index && scale && scale == 2)
5579 base = index, scale = 1;
5581 /* Special case: scaling cannot be encoded without base or displacement. */
5582 if (!base && !disp && index && scale != 1)
5594 /* Return cost of the memory address x.
5595 For i386, it is better to use a complex address than let gcc copy
5596 the address into a reg and make a new pseudo. But not if the address
5597 requires to two regs - that would mean more pseudos with longer
5600 ix86_address_cost (x)
5603 struct ix86_address parts;
5606 if (!ix86_decompose_address (x, &parts))
5609 if (parts.base && GET_CODE (parts.base) == SUBREG)
5610 parts.base = SUBREG_REG (parts.base);
5611 if (parts.index && GET_CODE (parts.index) == SUBREG)
5612 parts.index = SUBREG_REG (parts.index);
5614 /* More complex memory references are better. */
5615 if (parts.disp && parts.disp != const0_rtx)
5617 if (parts.seg != SEG_DEFAULT)
5620 /* Attempt to minimize number of registers in the address. */
5622 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5624 && (!REG_P (parts.index)
5625 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5629 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5631 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5632 && parts.base != parts.index)
5635 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5636 since it's predecode logic can't detect the length of instructions
5637 and it degenerates to vector decoded. Increase cost of such
5638 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5639 to split such addresses or even refuse such addresses at all.
5641 Following addressing modes are affected:
5646 The first and last case may be avoidable by explicitly coding the zero in
5647 memory address, but I don't have AMD-K6 machine handy to check this
5651 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5652 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5653 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5659 /* If X is a machine specific address (i.e. a symbol or label being
5660 referenced as a displacement from the GOT implemented using an
5661 UNSPEC), then return the base term. Otherwise return X. */
5664 ix86_find_base_term (x)
5671 if (GET_CODE (x) != CONST)
5674 if (GET_CODE (term) == PLUS
5675 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5676 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5677 term = XEXP (term, 0);
5678 if (GET_CODE (term) != UNSPEC
5679 || XINT (term, 1) != UNSPEC_GOTPCREL)
5682 term = XVECEXP (term, 0, 0);
5684 if (GET_CODE (term) != SYMBOL_REF
5685 && GET_CODE (term) != LABEL_REF)
5691 term = ix86_delegitimize_address (x);
5693 if (GET_CODE (term) != SYMBOL_REF
5694 && GET_CODE (term) != LABEL_REF)
5700 /* Determine if a given RTX is a valid constant. We already know this
5701 satisfies CONSTANT_P. */
5704 legitimate_constant_p (x)
5709 switch (GET_CODE (x))
5712 /* TLS symbols are not constant. */
5713 if (tls_symbolic_operand (x, Pmode))
5718 inner = XEXP (x, 0);
5720 /* Offsets of TLS symbols are never valid.
5721 Discourage CSE from creating them. */
5722 if (GET_CODE (inner) == PLUS
5723 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5726 if (GET_CODE (inner) == PLUS)
5728 if (GET_CODE (XEXP (inner, 1)) != CONST_INT)
5730 inner = XEXP (inner, 0);
5733 /* Only some unspecs are valid as "constants". */
5734 if (GET_CODE (inner) == UNSPEC)
5735 switch (XINT (inner, 1))
5739 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5741 return local_dynamic_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5751 /* Otherwise we handle everything else in the move patterns. */
5755 /* Determine if it's legal to put X into the constant pool. This
5756 is not possible for the address of thread-local symbols, which
5757 is checked above. */
5760 ix86_cannot_force_const_mem (x)
5763 return !legitimate_constant_p (x);
5766 /* Determine if a given RTX is a valid constant address. */
5769 constant_address_p (x)
5772 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5775 /* Nonzero if the constant value X is a legitimate general operand
5776 when generating PIC code. It is given that flag_pic is on and
5777 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5780 legitimate_pic_operand_p (x)
5785 switch (GET_CODE (x))
5788 inner = XEXP (x, 0);
5790 /* Only some unspecs are valid as "constants". */
5791 if (GET_CODE (inner) == UNSPEC)
5792 switch (XINT (inner, 1))
5795 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5803 return legitimate_pic_address_disp_p (x);
5810 /* Determine if a given CONST RTX is a valid memory displacement
5814 legitimate_pic_address_disp_p (disp)
5819 /* In 64bit mode we can allow direct addresses of symbols and labels
5820 when they are not dynamic symbols. */
5823 /* TLS references should always be enclosed in UNSPEC. */
5824 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5826 if (GET_CODE (disp) == SYMBOL_REF
5827 && ix86_cmodel == CM_SMALL_PIC
5828 && SYMBOL_REF_LOCAL_P (disp))
5830 if (GET_CODE (disp) == LABEL_REF)
5832 if (GET_CODE (disp) == CONST
5833 && GET_CODE (XEXP (disp, 0)) == PLUS
5834 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5835 && ix86_cmodel == CM_SMALL_PIC
5836 && SYMBOL_REF_LOCAL_P (XEXP (XEXP (disp, 0), 0)))
5837 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5838 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5839 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5840 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5843 if (GET_CODE (disp) != CONST)
5845 disp = XEXP (disp, 0);
5849 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5850 of GOT tables. We should not need these anyway. */
5851 if (GET_CODE (disp) != UNSPEC
5852 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5855 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5856 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5862 if (GET_CODE (disp) == PLUS)
5864 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5866 disp = XEXP (disp, 0);
5870 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5871 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5873 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5874 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5875 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5877 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5878 if (strstr (sym_name, "$pb") != 0)
5883 if (GET_CODE (disp) != UNSPEC)
5886 switch (XINT (disp, 1))
5891 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5893 if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5894 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5895 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5897 case UNSPEC_GOTTPOFF:
5898 case UNSPEC_GOTNTPOFF:
5899 case UNSPEC_INDNTPOFF:
5902 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5904 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5906 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5912 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5913 memory address for an instruction. The MODE argument is the machine mode
5914 for the MEM expression that wants to use this address.
5916 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5917 convert common non-canonical forms to canonical form so that they will
5921 legitimate_address_p (mode, addr, strict)
5922 enum machine_mode mode;
5926 struct ix86_address parts;
5927 rtx base, index, disp;
5928 HOST_WIDE_INT scale;
5929 const char *reason = NULL;
5930 rtx reason_rtx = NULL_RTX;
5932 if (TARGET_DEBUG_ADDR)
5935 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5936 GET_MODE_NAME (mode), strict);
5940 if (ix86_decompose_address (addr, &parts) <= 0)
5942 reason = "decomposition failed";
5947 index = parts.index;
5949 scale = parts.scale;
5951 /* Validate base register.
5953 Don't allow SUBREG's here, it can lead to spill failures when the base
5954 is one word out of a two word structure, which is represented internally
5962 if (GET_CODE (base) == SUBREG)
5963 reg = SUBREG_REG (base);
5967 if (GET_CODE (reg) != REG)
5969 reason = "base is not a register";
5973 if (GET_MODE (base) != Pmode)
5975 reason = "base is not in Pmode";
5979 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5980 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5982 reason = "base is not valid";
5987 /* Validate index register.
5989 Don't allow SUBREG's here, it can lead to spill failures when the index
5990 is one word out of a two word structure, which is represented internally
5998 if (GET_CODE (index) == SUBREG)
5999 reg = SUBREG_REG (index);
6003 if (GET_CODE (reg) != REG)
6005 reason = "index is not a register";
6009 if (GET_MODE (index) != Pmode)
6011 reason = "index is not in Pmode";
6015 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6016 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6018 reason = "index is not valid";
6023 /* Validate scale factor. */
6026 reason_rtx = GEN_INT (scale);
6029 reason = "scale without index";
6033 if (scale != 2 && scale != 4 && scale != 8)
6035 reason = "scale is not a valid multiplier";
6040 /* Validate displacement. */
6045 if (GET_CODE (disp) == CONST
6046 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6047 switch (XINT (XEXP (disp, 0), 1))
6051 case UNSPEC_GOTPCREL:
6054 goto is_legitimate_pic;
6056 case UNSPEC_GOTTPOFF:
6057 case UNSPEC_GOTNTPOFF:
6058 case UNSPEC_INDNTPOFF:
6064 reason = "invalid address unspec";
6068 else if (flag_pic && (SYMBOLIC_CONST (disp)
6070 && !machopic_operand_p (disp)
6075 if (TARGET_64BIT && (index || base))
6077 /* foo@dtpoff(%rX) is ok. */
6078 if (GET_CODE (disp) != CONST
6079 || GET_CODE (XEXP (disp, 0)) != PLUS
6080 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6081 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6082 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6083 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6085 reason = "non-constant pic memory reference";
6089 else if (! legitimate_pic_address_disp_p (disp))
6091 reason = "displacement is an invalid pic construct";
6095 /* This code used to verify that a symbolic pic displacement
6096 includes the pic_offset_table_rtx register.
6098 While this is good idea, unfortunately these constructs may
6099 be created by "adds using lea" optimization for incorrect
6108 This code is nonsensical, but results in addressing
6109 GOT table with pic_offset_table_rtx base. We can't
6110 just refuse it easily, since it gets matched by
6111 "addsi3" pattern, that later gets split to lea in the
6112 case output register differs from input. While this
6113 can be handled by separate addsi pattern for this case
6114 that never results in lea, this seems to be easier and
6115 correct fix for crash to disable this test. */
6117 else if (GET_CODE (disp) != LABEL_REF
6118 && GET_CODE (disp) != CONST_INT
6119 && (GET_CODE (disp) != CONST
6120 || !legitimate_constant_p (disp))
6121 && (GET_CODE (disp) != SYMBOL_REF
6122 || !legitimate_constant_p (disp)))
6124 reason = "displacement is not constant";
6127 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6129 reason = "displacement is out of range";
6134 /* Everything looks valid. */
6135 if (TARGET_DEBUG_ADDR)
6136 fprintf (stderr, "Success.\n");
6140 if (TARGET_DEBUG_ADDR)
6142 fprintf (stderr, "Error: %s\n", reason);
6143 debug_rtx (reason_rtx);
6148 /* Return an unique alias set for the GOT. */
6150 static HOST_WIDE_INT
6151 ix86_GOT_alias_set ()
6153 static HOST_WIDE_INT set = -1;
6155 set = new_alias_set ();
6159 /* Return a legitimate reference for ORIG (an address) using the
6160 register REG. If REG is 0, a new pseudo is generated.
6162 There are two types of references that must be handled:
6164 1. Global data references must load the address from the GOT, via
6165 the PIC reg. An insn is emitted to do this load, and the reg is
6168 2. Static data references, constant pool addresses, and code labels
6169 compute the address as an offset from the GOT, whose base is in
6170 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6171 differentiate them from global data objects. The returned
6172 address is the PIC reg + an unspec constant.
6174 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6175 reg also appears in the address. */
6178 legitimize_pic_address (orig, reg)
6188 reg = gen_reg_rtx (Pmode);
6189 /* Use the generic Mach-O PIC machinery. */
6190 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6193 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6195 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6197 /* This symbol may be referenced via a displacement from the PIC
6198 base address (@GOTOFF). */
6200 if (reload_in_progress)
6201 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6202 if (GET_CODE (addr) == CONST)
6203 addr = XEXP (addr, 0);
6204 if (GET_CODE (addr) == PLUS)
6206 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6207 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6210 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6211 new = gen_rtx_CONST (Pmode, new);
6212 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6216 emit_move_insn (reg, new);
6220 else if (GET_CODE (addr) == SYMBOL_REF)
6224 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6225 new = gen_rtx_CONST (Pmode, new);
6226 new = gen_rtx_MEM (Pmode, new);
6227 RTX_UNCHANGING_P (new) = 1;
6228 set_mem_alias_set (new, ix86_GOT_alias_set ());
6231 reg = gen_reg_rtx (Pmode);
6232 /* Use directly gen_movsi, otherwise the address is loaded
6233 into register for CSE. We don't want to CSE this addresses,
6234 instead we CSE addresses from the GOT table, so skip this. */
6235 emit_insn (gen_movsi (reg, new));
6240 /* This symbol must be referenced via a load from the
6241 Global Offset Table (@GOT). */
6243 if (reload_in_progress)
6244 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6245 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6246 new = gen_rtx_CONST (Pmode, new);
6247 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6248 new = gen_rtx_MEM (Pmode, new);
6249 RTX_UNCHANGING_P (new) = 1;
6250 set_mem_alias_set (new, ix86_GOT_alias_set ());
6253 reg = gen_reg_rtx (Pmode);
6254 emit_move_insn (reg, new);
6260 if (GET_CODE (addr) == CONST)
6262 addr = XEXP (addr, 0);
6264 /* We must match stuff we generate before. Assume the only
6265 unspecs that can get here are ours. Not that we could do
6266 anything with them anyway... */
6267 if (GET_CODE (addr) == UNSPEC
6268 || (GET_CODE (addr) == PLUS
6269 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6271 if (GET_CODE (addr) != PLUS)
6274 if (GET_CODE (addr) == PLUS)
6276 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6278 /* Check first to see if this is a constant offset from a @GOTOFF
6279 symbol reference. */
6280 if (local_symbolic_operand (op0, Pmode)
6281 && GET_CODE (op1) == CONST_INT)
6285 if (reload_in_progress)
6286 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6287 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6289 new = gen_rtx_PLUS (Pmode, new, op1);
6290 new = gen_rtx_CONST (Pmode, new);
6291 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6295 emit_move_insn (reg, new);
6301 if (INTVAL (op1) < -16*1024*1024
6302 || INTVAL (op1) >= 16*1024*1024)
6303 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6308 base = legitimize_pic_address (XEXP (addr, 0), reg);
6309 new = legitimize_pic_address (XEXP (addr, 1),
6310 base == reg ? NULL_RTX : reg);
6312 if (GET_CODE (new) == CONST_INT)
6313 new = plus_constant (base, INTVAL (new));
6316 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6318 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6319 new = XEXP (new, 1);
6321 new = gen_rtx_PLUS (Pmode, base, new);
6329 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6332 get_thread_pointer (to_reg)
6337 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6341 reg = gen_reg_rtx (Pmode);
6342 insn = gen_rtx_SET (VOIDmode, reg, tp);
6343 insn = emit_insn (insn);
6348 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6349 false if we expect this to be used for a memory address and true if
6350 we expect to load the address into a register. */
6353 legitimize_tls_address (x, model, for_mov)
6355 enum tls_model model;
6358 rtx dest, base, off, pic;
6363 case TLS_MODEL_GLOBAL_DYNAMIC:
6364 dest = gen_reg_rtx (Pmode);
6367 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6370 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6371 insns = get_insns ();
6374 emit_libcall_block (insns, dest, rax, x);
6377 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6380 case TLS_MODEL_LOCAL_DYNAMIC:
6381 base = gen_reg_rtx (Pmode);
6384 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6387 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6388 insns = get_insns ();
6391 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6392 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6393 emit_libcall_block (insns, base, rax, note);
6396 emit_insn (gen_tls_local_dynamic_base_32 (base));
6398 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6399 off = gen_rtx_CONST (Pmode, off);
6401 return gen_rtx_PLUS (Pmode, base, off);
6403 case TLS_MODEL_INITIAL_EXEC:
6407 type = UNSPEC_GOTNTPOFF;
6411 if (reload_in_progress)
6412 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6413 pic = pic_offset_table_rtx;
6414 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6416 else if (!TARGET_GNU_TLS)
6418 pic = gen_reg_rtx (Pmode);
6419 emit_insn (gen_set_got (pic));
6420 type = UNSPEC_GOTTPOFF;
6425 type = UNSPEC_INDNTPOFF;
6428 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6429 off = gen_rtx_CONST (Pmode, off);
6431 off = gen_rtx_PLUS (Pmode, pic, off);
6432 off = gen_rtx_MEM (Pmode, off);
6433 RTX_UNCHANGING_P (off) = 1;
6434 set_mem_alias_set (off, ix86_GOT_alias_set ());
6436 if (TARGET_64BIT || TARGET_GNU_TLS)
6438 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6439 off = force_reg (Pmode, off);
6440 return gen_rtx_PLUS (Pmode, base, off);
6444 base = get_thread_pointer (true);
6445 dest = gen_reg_rtx (Pmode);
6446 emit_insn (gen_subsi3 (dest, base, off));
6450 case TLS_MODEL_LOCAL_EXEC:
6451 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6452 (TARGET_64BIT || TARGET_GNU_TLS)
6453 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6454 off = gen_rtx_CONST (Pmode, off);
6456 if (TARGET_64BIT || TARGET_GNU_TLS)
6458 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6459 return gen_rtx_PLUS (Pmode, base, off);
6463 base = get_thread_pointer (true);
6464 dest = gen_reg_rtx (Pmode);
6465 emit_insn (gen_subsi3 (dest, base, off));
6476 /* Try machine-dependent ways of modifying an illegitimate address
6477 to be legitimate. If we find one, return the new, valid address.
6478 This macro is used in only one place: `memory_address' in explow.c.
6480 OLDX is the address as it was before break_out_memory_refs was called.
6481 In some cases it is useful to look at this to decide what needs to be done.
6483 MODE and WIN are passed so that this macro can use
6484 GO_IF_LEGITIMATE_ADDRESS.
6486 It is always safe for this macro to do nothing. It exists to recognize
6487 opportunities to optimize the output.
6489 For the 80386, we handle X+REG by loading X into a register R and
6490 using R+REG. R will go in a general reg and indexing will be used.
6491 However, if REG is a broken-out memory address or multiplication,
6492 nothing needs to be done because REG can certainly go in a general reg.
6494 When -fpic is used, special handling is needed for symbolic references.
6495 See comments by legitimize_pic_address in i386.c for details. */
6498 legitimize_address (x, oldx, mode)
6500 register rtx oldx ATTRIBUTE_UNUSED;
6501 enum machine_mode mode;
6506 if (TARGET_DEBUG_ADDR)
6508 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6509 GET_MODE_NAME (mode));
6513 log = tls_symbolic_operand (x, mode);
6515 return legitimize_tls_address (x, log, false);
6517 if (flag_pic && SYMBOLIC_CONST (x))
6518 return legitimize_pic_address (x, 0);
6520 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6521 if (GET_CODE (x) == ASHIFT
6522 && GET_CODE (XEXP (x, 1)) == CONST_INT
6523 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6526 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6527 GEN_INT (1 << log));
6530 if (GET_CODE (x) == PLUS)
6532 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6534 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6535 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6536 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6539 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6540 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6541 GEN_INT (1 << log));
6544 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6545 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6546 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6549 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6550 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6551 GEN_INT (1 << log));
6554 /* Put multiply first if it isn't already. */
6555 if (GET_CODE (XEXP (x, 1)) == MULT)
6557 rtx tmp = XEXP (x, 0);
6558 XEXP (x, 0) = XEXP (x, 1);
6563 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6564 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6565 created by virtual register instantiation, register elimination, and
6566 similar optimizations. */
6567 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6570 x = gen_rtx_PLUS (Pmode,
6571 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6572 XEXP (XEXP (x, 1), 0)),
6573 XEXP (XEXP (x, 1), 1));
6577 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6578 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6579 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6580 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6581 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6582 && CONSTANT_P (XEXP (x, 1)))
6585 rtx other = NULL_RTX;
6587 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6589 constant = XEXP (x, 1);
6590 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6592 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6594 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6595 other = XEXP (x, 1);
6603 x = gen_rtx_PLUS (Pmode,
6604 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6605 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6606 plus_constant (other, INTVAL (constant)));
6610 if (changed && legitimate_address_p (mode, x, FALSE))
6613 if (GET_CODE (XEXP (x, 0)) == MULT)
6616 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6619 if (GET_CODE (XEXP (x, 1)) == MULT)
6622 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6626 && GET_CODE (XEXP (x, 1)) == REG
6627 && GET_CODE (XEXP (x, 0)) == REG)
6630 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6633 x = legitimize_pic_address (x, 0);
6636 if (changed && legitimate_address_p (mode, x, FALSE))
6639 if (GET_CODE (XEXP (x, 0)) == REG)
6641 register rtx temp = gen_reg_rtx (Pmode);
6642 register rtx val = force_operand (XEXP (x, 1), temp);
6644 emit_move_insn (temp, val);
6650 else if (GET_CODE (XEXP (x, 1)) == REG)
6652 register rtx temp = gen_reg_rtx (Pmode);
6653 register rtx val = force_operand (XEXP (x, 0), temp);
6655 emit_move_insn (temp, val);
6665 /* Print an integer constant expression in assembler syntax. Addition
6666 and subtraction are the only arithmetic that may appear in these
6667 expressions. FILE is the stdio stream to write to, X is the rtx, and
6668 CODE is the operand print code from the output string. */
6671 output_pic_addr_const (file, x, code)
6678 switch (GET_CODE (x))
6688 assemble_name (file, XSTR (x, 0));
6689 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6690 fputs ("@PLT", file);
6697 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6698 assemble_name (asm_out_file, buf);
6702 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6706 /* This used to output parentheses around the expression,
6707 but that does not work on the 386 (either ATT or BSD assembler). */
6708 output_pic_addr_const (file, XEXP (x, 0), code);
6712 if (GET_MODE (x) == VOIDmode)
6714 /* We can use %d if the number is <32 bits and positive. */
6715 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6716 fprintf (file, "0x%lx%08lx",
6717 (unsigned long) CONST_DOUBLE_HIGH (x),
6718 (unsigned long) CONST_DOUBLE_LOW (x));
6720 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6723 /* We can't handle floating point constants;
6724 PRINT_OPERAND must handle them. */
6725 output_operand_lossage ("floating constant misused");
6729 /* Some assemblers need integer constants to appear first. */
6730 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6732 output_pic_addr_const (file, XEXP (x, 0), code);
6734 output_pic_addr_const (file, XEXP (x, 1), code);
6736 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6738 output_pic_addr_const (file, XEXP (x, 1), code);
6740 output_pic_addr_const (file, XEXP (x, 0), code);
6748 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6749 output_pic_addr_const (file, XEXP (x, 0), code);
6751 output_pic_addr_const (file, XEXP (x, 1), code);
6753 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6757 if (XVECLEN (x, 0) != 1)
6759 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6760 switch (XINT (x, 1))
6763 fputs ("@GOT", file);
6766 fputs ("@GOTOFF", file);
6768 case UNSPEC_GOTPCREL:
6769 fputs ("@GOTPCREL(%rip)", file);
6771 case UNSPEC_GOTTPOFF:
6772 /* FIXME: This might be @TPOFF in Sun ld too. */
6773 fputs ("@GOTTPOFF", file);
6776 fputs ("@TPOFF", file);
6780 fputs ("@TPOFF", file);
6782 fputs ("@NTPOFF", file);
6785 fputs ("@DTPOFF", file);
6787 case UNSPEC_GOTNTPOFF:
6789 fputs ("@GOTTPOFF(%rip)", file);
6791 fputs ("@GOTNTPOFF", file);
6793 case UNSPEC_INDNTPOFF:
6794 fputs ("@INDNTPOFF", file);
6797 output_operand_lossage ("invalid UNSPEC as operand");
6803 output_operand_lossage ("invalid expression as operand");
6807 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6808 We need to handle our special PIC relocations. */
6811 i386_dwarf_output_addr_const (file, x)
6816 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6820 fprintf (file, "%s", ASM_LONG);
6823 output_pic_addr_const (file, x, '\0');
6825 output_addr_const (file, x);
6829 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6830 We need to emit DTP-relative relocations. */
6833 i386_output_dwarf_dtprel (file, size, x)
6838 fputs (ASM_LONG, file);
6839 output_addr_const (file, x);
6840 fputs ("@DTPOFF", file);
6846 fputs (", 0", file);
6853 /* In the name of slightly smaller debug output, and to cater to
6854 general assembler losage, recognize PIC+GOTOFF and turn it back
6855 into a direct symbol reference. */
6858 ix86_delegitimize_address (orig_x)
6863 if (GET_CODE (x) == MEM)
6868 if (GET_CODE (x) != CONST
6869 || GET_CODE (XEXP (x, 0)) != UNSPEC
6870 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6871 || GET_CODE (orig_x) != MEM)
6873 return XVECEXP (XEXP (x, 0), 0, 0);
6876 if (GET_CODE (x) != PLUS
6877 || GET_CODE (XEXP (x, 1)) != CONST)
6880 if (GET_CODE (XEXP (x, 0)) == REG
6881 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6882 /* %ebx + GOT/GOTOFF */
6884 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6886 /* %ebx + %reg * scale + GOT/GOTOFF */
6888 if (GET_CODE (XEXP (y, 0)) == REG
6889 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6891 else if (GET_CODE (XEXP (y, 1)) == REG
6892 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6896 if (GET_CODE (y) != REG
6897 && GET_CODE (y) != MULT
6898 && GET_CODE (y) != ASHIFT)
6904 x = XEXP (XEXP (x, 1), 0);
6905 if (GET_CODE (x) == UNSPEC
6906 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6907 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6910 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6911 return XVECEXP (x, 0, 0);
6914 if (GET_CODE (x) == PLUS
6915 && GET_CODE (XEXP (x, 0)) == UNSPEC
6916 && GET_CODE (XEXP (x, 1)) == CONST_INT
6917 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6918 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6919 && GET_CODE (orig_x) != MEM)))
6921 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6923 return gen_rtx_PLUS (Pmode, y, x);
6931 put_condition_code (code, mode, reverse, fp, file)
6933 enum machine_mode mode;
6939 if (mode == CCFPmode || mode == CCFPUmode)
6941 enum rtx_code second_code, bypass_code;
6942 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6943 if (bypass_code != NIL || second_code != NIL)
6945 code = ix86_fp_compare_code_to_integer (code);
6949 code = reverse_condition (code);
6960 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6965 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6966 Those same assemblers have the same but opposite losage on cmov. */
6969 suffix = fp ? "nbe" : "a";
6972 if (mode == CCNOmode || mode == CCGOCmode)
6974 else if (mode == CCmode || mode == CCGCmode)
6985 if (mode == CCNOmode || mode == CCGOCmode)
6987 else if (mode == CCmode || mode == CCGCmode)
6996 suffix = fp ? "nb" : "ae";
6999 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7009 suffix = fp ? "u" : "p";
7012 suffix = fp ? "nu" : "np";
7017 fputs (suffix, file);
7021 print_reg (x, code, file)
7026 if (REGNO (x) == ARG_POINTER_REGNUM
7027 || REGNO (x) == FRAME_POINTER_REGNUM
7028 || REGNO (x) == FLAGS_REG
7029 || REGNO (x) == FPSR_REG)
7032 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7035 if (code == 'w' || MMX_REG_P (x))
7037 else if (code == 'b')
7039 else if (code == 'k')
7041 else if (code == 'q')
7043 else if (code == 'y')
7045 else if (code == 'h')
7048 code = GET_MODE_SIZE (GET_MODE (x));
7050 /* Irritatingly, AMD extended registers use different naming convention
7051 from the normal registers. */
7052 if (REX_INT_REG_P (x))
7059 error ("extended registers have no high halves");
7062 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7065 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7068 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7071 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7074 error ("unsupported operand size for extended register");
7082 if (STACK_TOP_P (x))
7084 fputs ("st(0)", file);
7091 if (! ANY_FP_REG_P (x))
7092 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7096 fputs (hi_reg_name[REGNO (x)], file);
7099 fputs (qi_reg_name[REGNO (x)], file);
7102 fputs (qi_high_reg_name[REGNO (x)], file);
7109 /* Locate some local-dynamic symbol still in use by this function
7110 so that we can print its name in some tls_local_dynamic_base
7114 get_some_local_dynamic_name ()
7118 if (cfun->machine->some_ld_name)
7119 return cfun->machine->some_ld_name;
7121 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7123 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7124 return cfun->machine->some_ld_name;
7130 get_some_local_dynamic_name_1 (px, data)
7132 void *data ATTRIBUTE_UNUSED;
7136 if (GET_CODE (x) == SYMBOL_REF
7137 && local_dynamic_symbolic_operand (x, Pmode))
7139 cfun->machine->some_ld_name = XSTR (x, 0);
7147 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7148 C -- print opcode suffix for set/cmov insn.
7149 c -- like C, but print reversed condition
7150 F,f -- likewise, but for floating-point.
7151 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
7153 R -- print the prefix for register names.
7154 z -- print the opcode suffix for the size of the current operand.
7155 * -- print a star (in certain assembler syntax)
7156 A -- print an absolute memory reference.
7157 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7158 s -- print a shift double count, followed by the assemblers argument
7160 b -- print the QImode name of the register for the indicated operand.
7161 %b0 would print %al if operands[0] is reg 0.
7162 w -- likewise, print the HImode name of the register.
7163 k -- likewise, print the SImode name of the register.
7164 q -- likewise, print the DImode name of the register.
7165 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7166 y -- print "st(0)" instead of "st" as a register.
7167 D -- print condition for SSE cmp instruction.
7168 P -- if PIC, print an @PLT suffix.
7169 X -- don't print any sort of PIC '@' suffix for a symbol.
7170 & -- print some in-use local-dynamic symbol name.
7174 print_operand (file, x, code)
7184 if (ASSEMBLER_DIALECT == ASM_ATT)
7189 assemble_name (file, get_some_local_dynamic_name ());
7193 if (ASSEMBLER_DIALECT == ASM_ATT)
7195 else if (ASSEMBLER_DIALECT == ASM_INTEL)
7197 /* Intel syntax. For absolute addresses, registers should not
7198 be surrounded by braces. */
7199 if (GET_CODE (x) != REG)
7202 PRINT_OPERAND (file, x, 0);
7210 PRINT_OPERAND (file, x, 0);
7215 if (ASSEMBLER_DIALECT == ASM_ATT)
7220 if (ASSEMBLER_DIALECT == ASM_ATT)
7225 if (ASSEMBLER_DIALECT == ASM_ATT)
7230 if (ASSEMBLER_DIALECT == ASM_ATT)
7235 if (ASSEMBLER_DIALECT == ASM_ATT)
7240 if (ASSEMBLER_DIALECT == ASM_ATT)
7245 /* 387 opcodes don't get size suffixes if the operands are
7247 if (STACK_REG_P (x))
7250 /* Likewise if using Intel opcodes. */
7251 if (ASSEMBLER_DIALECT == ASM_INTEL)
7254 /* This is the size of op from size of operand. */
7255 switch (GET_MODE_SIZE (GET_MODE (x)))
7258 #ifdef HAVE_GAS_FILDS_FISTS
7264 if (GET_MODE (x) == SFmode)
7279 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7281 #ifdef GAS_MNEMONICS
7307 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7309 PRINT_OPERAND (file, x, 0);
7315 /* Little bit of braindamage here. The SSE compare instructions
7316 does use completely different names for the comparisons that the
7317 fp conditional moves. */
7318 switch (GET_CODE (x))
7333 fputs ("unord", file);
7337 fputs ("neq", file);
7341 fputs ("nlt", file);
7345 fputs ("nle", file);
7348 fputs ("ord", file);
7356 #ifdef CMOV_SUN_AS_SYNTAX
7357 if (ASSEMBLER_DIALECT == ASM_ATT)
7359 switch (GET_MODE (x))
7361 case HImode: putc ('w', file); break;
7363 case SFmode: putc ('l', file); break;
7365 case DFmode: putc ('q', file); break;
7373 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7376 #ifdef CMOV_SUN_AS_SYNTAX
7377 if (ASSEMBLER_DIALECT == ASM_ATT)
7380 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7383 /* Like above, but reverse condition */
7385 /* Check to see if argument to %c is really a constant
7386 and not a condition code which needs to be reversed. */
7387 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7389 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7392 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7395 #ifdef CMOV_SUN_AS_SYNTAX
7396 if (ASSEMBLER_DIALECT == ASM_ATT)
7399 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7405 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7408 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7411 int pred_val = INTVAL (XEXP (x, 0));
7413 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7414 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7416 int taken = pred_val > REG_BR_PROB_BASE / 2;
7417 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7419 /* Emit hints only in the case default branch prediction
7420 heuristics would fail. */
7421 if (taken != cputaken)
7423 /* We use 3e (DS) prefix for taken branches and
7424 2e (CS) prefix for not taken branches. */
7426 fputs ("ds ; ", file);
7428 fputs ("cs ; ", file);
7435 output_operand_lossage ("invalid operand code `%c'", code);
7439 if (GET_CODE (x) == REG)
7441 PRINT_REG (x, code, file);
7444 else if (GET_CODE (x) == MEM)
7446 /* No `byte ptr' prefix for call instructions. */
7447 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7450 switch (GET_MODE_SIZE (GET_MODE (x)))
7452 case 1: size = "BYTE"; break;
7453 case 2: size = "WORD"; break;
7454 case 4: size = "DWORD"; break;
7455 case 8: size = "QWORD"; break;
7456 case 12: size = "XWORD"; break;
7457 case 16: size = "XMMWORD"; break;
7462 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7465 else if (code == 'w')
7467 else if (code == 'k')
7471 fputs (" PTR ", file);
7475 /* Avoid (%rip) for call operands. */
7476 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7477 && GET_CODE (x) != CONST_INT)
7478 output_addr_const (file, x);
7479 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7480 output_operand_lossage ("invalid constraints for operand");
7485 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7490 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7491 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7493 if (ASSEMBLER_DIALECT == ASM_ATT)
7495 fprintf (file, "0x%lx", l);
7498 /* These float cases don't actually occur as immediate operands. */
7499 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7503 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7504 fprintf (file, "%s", dstr);
7507 else if (GET_CODE (x) == CONST_DOUBLE
7508 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7512 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7513 fprintf (file, "%s", dstr);
7520 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7522 if (ASSEMBLER_DIALECT == ASM_ATT)
7525 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7526 || GET_CODE (x) == LABEL_REF)
7528 if (ASSEMBLER_DIALECT == ASM_ATT)
7531 fputs ("OFFSET FLAT:", file);
7534 if (GET_CODE (x) == CONST_INT)
7535 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7537 output_pic_addr_const (file, x, code);
7539 output_addr_const (file, x);
7543 /* Print a memory operand whose address is ADDR. */
7546 print_operand_address (file, addr)
7550 struct ix86_address parts;
7551 rtx base, index, disp;
7554 if (! ix86_decompose_address (addr, &parts))
7558 index = parts.index;
7560 scale = parts.scale;
7568 if (USER_LABEL_PREFIX[0] == 0)
7570 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7576 if (!base && !index)
7578 /* Displacement only requires special attention. */
7580 if (GET_CODE (disp) == CONST_INT)
7582 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7584 if (USER_LABEL_PREFIX[0] == 0)
7586 fputs ("ds:", file);
7588 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7591 output_pic_addr_const (file, disp, 0);
7593 output_addr_const (file, disp);
7595 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7597 && ((GET_CODE (disp) == SYMBOL_REF
7598 && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7599 || GET_CODE (disp) == LABEL_REF
7600 || (GET_CODE (disp) == CONST
7601 && GET_CODE (XEXP (disp, 0)) == PLUS
7602 && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7603 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7604 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7605 fputs ("(%rip)", file);
7609 if (ASSEMBLER_DIALECT == ASM_ATT)
7614 output_pic_addr_const (file, disp, 0);
7615 else if (GET_CODE (disp) == LABEL_REF)
7616 output_asm_label (disp);
7618 output_addr_const (file, disp);
7623 PRINT_REG (base, 0, file);
7627 PRINT_REG (index, 0, file);
7629 fprintf (file, ",%d", scale);
7635 rtx offset = NULL_RTX;
7639 /* Pull out the offset of a symbol; print any symbol itself. */
7640 if (GET_CODE (disp) == CONST
7641 && GET_CODE (XEXP (disp, 0)) == PLUS
7642 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7644 offset = XEXP (XEXP (disp, 0), 1);
7645 disp = gen_rtx_CONST (VOIDmode,
7646 XEXP (XEXP (disp, 0), 0));
7650 output_pic_addr_const (file, disp, 0);
7651 else if (GET_CODE (disp) == LABEL_REF)
7652 output_asm_label (disp);
7653 else if (GET_CODE (disp) == CONST_INT)
7656 output_addr_const (file, disp);
7662 PRINT_REG (base, 0, file);
7665 if (INTVAL (offset) >= 0)
7667 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7671 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7678 PRINT_REG (index, 0, file);
7680 fprintf (file, "*%d", scale);
7688 output_addr_const_extra (file, x)
7694 if (GET_CODE (x) != UNSPEC)
7697 op = XVECEXP (x, 0, 0);
7698 switch (XINT (x, 1))
7700 case UNSPEC_GOTTPOFF:
7701 output_addr_const (file, op);
7702 /* FIXME: This might be @TPOFF in Sun ld. */
7703 fputs ("@GOTTPOFF", file);
7706 output_addr_const (file, op);
7707 fputs ("@TPOFF", file);
7710 output_addr_const (file, op);
7712 fputs ("@TPOFF", file);
7714 fputs ("@NTPOFF", file);
7717 output_addr_const (file, op);
7718 fputs ("@DTPOFF", file);
7720 case UNSPEC_GOTNTPOFF:
7721 output_addr_const (file, op);
7723 fputs ("@GOTTPOFF(%rip)", file);
7725 fputs ("@GOTNTPOFF", file);
7727 case UNSPEC_INDNTPOFF:
7728 output_addr_const (file, op);
7729 fputs ("@INDNTPOFF", file);
7739 /* Split one or more DImode RTL references into pairs of SImode
7740 references. The RTL can be REG, offsettable MEM, integer constant, or
7741 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7742 split and "num" is its length. lo_half and hi_half are output arrays
7743 that parallel "operands". */
7746 split_di (operands, num, lo_half, hi_half)
7749 rtx lo_half[], hi_half[];
7753 rtx op = operands[num];
7755 /* simplify_subreg refuse to split volatile memory addresses,
7756 but we still have to handle it. */
7757 if (GET_CODE (op) == MEM)
7759 lo_half[num] = adjust_address (op, SImode, 0);
7760 hi_half[num] = adjust_address (op, SImode, 4);
7764 lo_half[num] = simplify_gen_subreg (SImode, op,
7765 GET_MODE (op) == VOIDmode
7766 ? DImode : GET_MODE (op), 0);
7767 hi_half[num] = simplify_gen_subreg (SImode, op,
7768 GET_MODE (op) == VOIDmode
7769 ? DImode : GET_MODE (op), 4);
7773 /* Split one or more TImode RTL references into pairs of SImode
7774 references. The RTL can be REG, offsettable MEM, integer constant, or
7775 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7776 split and "num" is its length. lo_half and hi_half are output arrays
7777 that parallel "operands". */
7780 split_ti (operands, num, lo_half, hi_half)
7783 rtx lo_half[], hi_half[];
7787 rtx op = operands[num];
7789 /* simplify_subreg refuse to split volatile memory addresses, but we
7790 still have to handle it. */
7791 if (GET_CODE (op) == MEM)
7793 lo_half[num] = adjust_address (op, DImode, 0);
7794 hi_half[num] = adjust_address (op, DImode, 8);
7798 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7799 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7804 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7805 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7806 is the expression of the binary operation. The output may either be
7807 emitted here, or returned to the caller, like all output_* functions.
7809 There is no guarantee that the operands are the same mode, as they
7810 might be within FLOAT or FLOAT_EXTEND expressions. */
7812 #ifndef SYSV386_COMPAT
7813 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7814 wants to fix the assemblers because that causes incompatibility
7815 with gcc. No-one wants to fix gcc because that causes
7816 incompatibility with assemblers... You can use the option of
7817 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7818 #define SYSV386_COMPAT 1
7822 output_387_binary_op (insn, operands)
7826 static char buf[30];
7829 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7831 #ifdef ENABLE_CHECKING
7832 /* Even if we do not want to check the inputs, this documents input
7833 constraints. Which helps in understanding the following code. */
7834 if (STACK_REG_P (operands[0])
7835 && ((REG_P (operands[1])
7836 && REGNO (operands[0]) == REGNO (operands[1])
7837 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7838 || (REG_P (operands[2])
7839 && REGNO (operands[0]) == REGNO (operands[2])
7840 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7841 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7847 switch (GET_CODE (operands[3]))
7850 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7851 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7859 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7860 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7868 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7869 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7877 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7878 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7892 if (GET_MODE (operands[0]) == SFmode)
7893 strcat (buf, "ss\t{%2, %0|%0, %2}");
7895 strcat (buf, "sd\t{%2, %0|%0, %2}");
7900 switch (GET_CODE (operands[3]))
7904 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7906 rtx temp = operands[2];
7907 operands[2] = operands[1];
7911 /* know operands[0] == operands[1]. */
7913 if (GET_CODE (operands[2]) == MEM)
7919 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7921 if (STACK_TOP_P (operands[0]))
7922 /* How is it that we are storing to a dead operand[2]?
7923 Well, presumably operands[1] is dead too. We can't
7924 store the result to st(0) as st(0) gets popped on this
7925 instruction. Instead store to operands[2] (which I
7926 think has to be st(1)). st(1) will be popped later.
7927 gcc <= 2.8.1 didn't have this check and generated
7928 assembly code that the Unixware assembler rejected. */
7929 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7931 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7935 if (STACK_TOP_P (operands[0]))
7936 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7938 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7943 if (GET_CODE (operands[1]) == MEM)
7949 if (GET_CODE (operands[2]) == MEM)
7955 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7958 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7959 derived assemblers, confusingly reverse the direction of
7960 the operation for fsub{r} and fdiv{r} when the
7961 destination register is not st(0). The Intel assembler
7962 doesn't have this brain damage. Read !SYSV386_COMPAT to
7963 figure out what the hardware really does. */
7964 if (STACK_TOP_P (operands[0]))
7965 p = "{p\t%0, %2|rp\t%2, %0}";
7967 p = "{rp\t%2, %0|p\t%0, %2}";
7969 if (STACK_TOP_P (operands[0]))
7970 /* As above for fmul/fadd, we can't store to st(0). */
7971 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7973 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7978 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7981 if (STACK_TOP_P (operands[0]))
7982 p = "{rp\t%0, %1|p\t%1, %0}";
7984 p = "{p\t%1, %0|rp\t%0, %1}";
7986 if (STACK_TOP_P (operands[0]))
7987 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7989 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7994 if (STACK_TOP_P (operands[0]))
7996 if (STACK_TOP_P (operands[1]))
7997 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7999 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8002 else if (STACK_TOP_P (operands[1]))
8005 p = "{\t%1, %0|r\t%0, %1}";
8007 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8013 p = "{r\t%2, %0|\t%0, %2}";
8015 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8028 /* Output code to initialize control word copies used by
8029 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
8030 is set to control word rounding downwards. */
8032 emit_i387_cw_initialization (normal, round_down)
8033 rtx normal, round_down;
8035 rtx reg = gen_reg_rtx (HImode);
8037 emit_insn (gen_x86_fnstcw_1 (normal));
8038 emit_move_insn (reg, normal);
8039 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8041 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8043 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8044 emit_move_insn (round_down, reg);
8047 /* Output code for INSN to convert a float to a signed int. OPERANDS
8048 are the insn operands. The output may be [HSD]Imode and the input
8049 operand may be [SDX]Fmode. */
8052 output_fix_trunc (insn, operands)
8056 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8057 int dimode_p = GET_MODE (operands[0]) == DImode;
8059 /* Jump through a hoop or two for DImode, since the hardware has no
8060 non-popping instruction. We used to do this a different way, but
8061 that was somewhat fragile and broke with post-reload splitters. */
8062 if (dimode_p && !stack_top_dies)
8063 output_asm_insn ("fld\t%y1", operands);
8065 if (!STACK_TOP_P (operands[1]))
8068 if (GET_CODE (operands[0]) != MEM)
8071 output_asm_insn ("fldcw\t%3", operands);
8072 if (stack_top_dies || dimode_p)
8073 output_asm_insn ("fistp%z0\t%0", operands);
8075 output_asm_insn ("fist%z0\t%0", operands);
8076 output_asm_insn ("fldcw\t%2", operands);
8081 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8082 should be used and 2 when fnstsw should be used. UNORDERED_P is true
8083 when fucom should be used. */
8086 output_fp_compare (insn, operands, eflags_p, unordered_p)
8089 int eflags_p, unordered_p;
8092 rtx cmp_op0 = operands[0];
8093 rtx cmp_op1 = operands[1];
8094 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8099 cmp_op1 = operands[2];
8103 if (GET_MODE (operands[0]) == SFmode)
8105 return "ucomiss\t{%1, %0|%0, %1}";
8107 return "comiss\t{%1, %0|%0, %1}";
8110 return "ucomisd\t{%1, %0|%0, %1}";
8112 return "comisd\t{%1, %0|%0, %1}";
8115 if (! STACK_TOP_P (cmp_op0))
8118 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8120 if (STACK_REG_P (cmp_op1)
8122 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8123 && REGNO (cmp_op1) != FIRST_STACK_REG)
8125 /* If both the top of the 387 stack dies, and the other operand
8126 is also a stack register that dies, then this must be a
8127 `fcompp' float compare */
8131 /* There is no double popping fcomi variant. Fortunately,
8132 eflags is immune from the fstp's cc clobbering. */
8134 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8136 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8144 return "fucompp\n\tfnstsw\t%0";
8146 return "fcompp\n\tfnstsw\t%0";
8159 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8161 static const char * const alt[24] =
8173 "fcomi\t{%y1, %0|%0, %y1}",
8174 "fcomip\t{%y1, %0|%0, %y1}",
8175 "fucomi\t{%y1, %0|%0, %y1}",
8176 "fucomip\t{%y1, %0|%0, %y1}",
8183 "fcom%z2\t%y2\n\tfnstsw\t%0",
8184 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8185 "fucom%z2\t%y2\n\tfnstsw\t%0",
8186 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8188 "ficom%z2\t%y2\n\tfnstsw\t%0",
8189 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8197 mask = eflags_p << 3;
8198 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8199 mask |= unordered_p << 1;
8200 mask |= stack_top_dies;
8213 ix86_output_addr_vec_elt (file, value)
8217 const char *directive = ASM_LONG;
8222 directive = ASM_QUAD;
8228 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8232 ix86_output_addr_diff_elt (file, value, rel)
8237 fprintf (file, "%s%s%d-%s%d\n",
8238 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8239 else if (HAVE_AS_GOTOFF_IN_DATA)
8240 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8242 else if (TARGET_MACHO)
8243 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8244 machopic_function_base_name () + 1);
8247 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8248 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8251 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8255 ix86_expand_clear (dest)
8260 /* We play register width games, which are only valid after reload. */
8261 if (!reload_completed)
8264 /* Avoid HImode and its attendant prefix byte. */
8265 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8266 dest = gen_rtx_REG (SImode, REGNO (dest));
8268 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8270 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8271 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8273 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8274 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8280 /* X is an unchanging MEM. If it is a constant pool reference, return
8281 the constant pool rtx, else NULL. */
8284 maybe_get_pool_constant (x)
8287 x = ix86_delegitimize_address (XEXP (x, 0));
8289 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8290 return get_pool_constant (x);
8296 ix86_expand_move (mode, operands)
8297 enum machine_mode mode;
8300 int strict = (reload_in_progress || reload_completed);
8302 enum tls_model model;
8307 model = tls_symbolic_operand (op1, Pmode);
8310 op1 = legitimize_tls_address (op1, model, true);
8311 op1 = force_operand (op1, op0);
8316 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8321 rtx temp = ((reload_in_progress
8322 || ((op0 && GET_CODE (op0) == REG)
8324 ? op0 : gen_reg_rtx (Pmode));
8325 op1 = machopic_indirect_data_reference (op1, temp);
8326 op1 = machopic_legitimize_pic_address (op1, mode,
8327 temp == op1 ? 0 : temp);
8329 else if (MACHOPIC_INDIRECT)
8330 op1 = machopic_indirect_data_reference (op1, 0);
8334 if (GET_CODE (op0) == MEM)
8335 op1 = force_reg (Pmode, op1);
8339 if (GET_CODE (temp) != REG)
8340 temp = gen_reg_rtx (Pmode);
8341 temp = legitimize_pic_address (op1, temp);
8346 #endif /* TARGET_MACHO */
8350 if (GET_CODE (op0) == MEM
8351 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8352 || !push_operand (op0, mode))
8353 && GET_CODE (op1) == MEM)
8354 op1 = force_reg (mode, op1);
8356 if (push_operand (op0, mode)
8357 && ! general_no_elim_operand (op1, mode))
8358 op1 = copy_to_mode_reg (mode, op1);
8360 /* Force large constants in 64bit compilation into register
8361 to get them CSEed. */
8362 if (TARGET_64BIT && mode == DImode
8363 && immediate_operand (op1, mode)
8364 && !x86_64_zero_extended_value (op1)
8365 && !register_operand (op0, mode)
8366 && optimize && !reload_completed && !reload_in_progress)
8367 op1 = copy_to_mode_reg (mode, op1);
8369 if (FLOAT_MODE_P (mode))
8371 /* If we are loading a floating point constant to a register,
8372 force the value to memory now, since we'll get better code
8373 out the back end. */
8377 else if (GET_CODE (op1) == CONST_DOUBLE)
8379 op1 = validize_mem (force_const_mem (mode, op1));
8380 if (!register_operand (op0, mode))
8382 rtx temp = gen_reg_rtx (mode);
8383 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8384 emit_move_insn (op0, temp);
8391 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8395 ix86_expand_vector_move (mode, operands)
8396 enum machine_mode mode;
8399 /* Force constants other than zero into memory. We do not know how
8400 the instructions used to build constants modify the upper 64 bits
8401 of the register, once we have that information we may be able
8402 to handle some of them more efficiently. */
8403 if ((reload_in_progress | reload_completed) == 0
8404 && register_operand (operands[0], mode)
8405 && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8406 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8408 /* Make operand1 a register if it isn't already. */
8410 && !register_operand (operands[0], mode)
8411 && !register_operand (operands[1], mode))
8413 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8414 emit_move_insn (operands[0], temp);
8418 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8421 /* Attempt to expand a binary operator. Make the expansion closer to the
8422 actual machine, then just general_operand, which will allow 3 separate
8423 memory references (one output, two input) in a single insn. */
8426 ix86_expand_binary_operator (code, mode, operands)
8428 enum machine_mode mode;
8431 int matching_memory;
8432 rtx src1, src2, dst, op, clob;
8438 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8439 if (GET_RTX_CLASS (code) == 'c'
8440 && (rtx_equal_p (dst, src2)
8441 || immediate_operand (src1, mode)))
8448 /* If the destination is memory, and we do not have matching source
8449 operands, do things in registers. */
8450 matching_memory = 0;
8451 if (GET_CODE (dst) == MEM)
8453 if (rtx_equal_p (dst, src1))
8454 matching_memory = 1;
8455 else if (GET_RTX_CLASS (code) == 'c'
8456 && rtx_equal_p (dst, src2))
8457 matching_memory = 2;
8459 dst = gen_reg_rtx (mode);
8462 /* Both source operands cannot be in memory. */
8463 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8465 if (matching_memory != 2)
8466 src2 = force_reg (mode, src2);
8468 src1 = force_reg (mode, src1);
8471 /* If the operation is not commutable, source 1 cannot be a constant
8472 or non-matching memory. */
8473 if ((CONSTANT_P (src1)
8474 || (!matching_memory && GET_CODE (src1) == MEM))
8475 && GET_RTX_CLASS (code) != 'c')
8476 src1 = force_reg (mode, src1);
8478 /* If optimizing, copy to regs to improve CSE */
8479 if (optimize && ! no_new_pseudos)
8481 if (GET_CODE (dst) == MEM)
8482 dst = gen_reg_rtx (mode);
8483 if (GET_CODE (src1) == MEM)
8484 src1 = force_reg (mode, src1);
8485 if (GET_CODE (src2) == MEM)
8486 src2 = force_reg (mode, src2);
8489 /* Emit the instruction. */
8491 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8492 if (reload_in_progress)
8494 /* Reload doesn't know about the flags register, and doesn't know that
8495 it doesn't want to clobber it. We can only do this with PLUS. */
8502 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8503 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8506 /* Fix up the destination if needed. */
8507 if (dst != operands[0])
8508 emit_move_insn (operands[0], dst);
8511 /* Return TRUE or FALSE depending on whether the binary operator meets the
8512 appropriate constraints. */
8515 ix86_binary_operator_ok (code, mode, operands)
8517 enum machine_mode mode ATTRIBUTE_UNUSED;
8520 /* Both source operands cannot be in memory. */
8521 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8523 /* If the operation is not commutable, source 1 cannot be a constant. */
8524 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8526 /* If the destination is memory, we must have a matching source operand. */
8527 if (GET_CODE (operands[0]) == MEM
8528 && ! (rtx_equal_p (operands[0], operands[1])
8529 || (GET_RTX_CLASS (code) == 'c'
8530 && rtx_equal_p (operands[0], operands[2]))))
8532 /* If the operation is not commutable and the source 1 is memory, we must
8533 have a matching destination. */
8534 if (GET_CODE (operands[1]) == MEM
8535 && GET_RTX_CLASS (code) != 'c'
8536 && ! rtx_equal_p (operands[0], operands[1]))
8541 /* Attempt to expand a unary operator. Make the expansion closer to the
8542 actual machine, then just general_operand, which will allow 2 separate
8543 memory references (one output, one input) in a single insn. */
8546 ix86_expand_unary_operator (code, mode, operands)
8548 enum machine_mode mode;
8551 int matching_memory;
8552 rtx src, dst, op, clob;
8557 /* If the destination is memory, and we do not have matching source
8558 operands, do things in registers. */
8559 matching_memory = 0;
8560 if (GET_CODE (dst) == MEM)
8562 if (rtx_equal_p (dst, src))
8563 matching_memory = 1;
8565 dst = gen_reg_rtx (mode);
8568 /* When source operand is memory, destination must match. */
8569 if (!matching_memory && GET_CODE (src) == MEM)
8570 src = force_reg (mode, src);
8572 /* If optimizing, copy to regs to improve CSE */
8573 if (optimize && ! no_new_pseudos)
8575 if (GET_CODE (dst) == MEM)
8576 dst = gen_reg_rtx (mode);
8577 if (GET_CODE (src) == MEM)
8578 src = force_reg (mode, src);
8581 /* Emit the instruction. */
8583 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8584 if (reload_in_progress || code == NOT)
8586 /* Reload doesn't know about the flags register, and doesn't know that
8587 it doesn't want to clobber it. */
8594 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8595 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8598 /* Fix up the destination if needed. */
8599 if (dst != operands[0])
8600 emit_move_insn (operands[0], dst);
8603 /* Return TRUE or FALSE depending on whether the unary operator meets the
8604 appropriate constraints. */
8607 ix86_unary_operator_ok (code, mode, operands)
8608 enum rtx_code code ATTRIBUTE_UNUSED;
8609 enum machine_mode mode ATTRIBUTE_UNUSED;
8610 rtx operands[2] ATTRIBUTE_UNUSED;
8612 /* If one of operands is memory, source and destination must match. */
8613 if ((GET_CODE (operands[0]) == MEM
8614 || GET_CODE (operands[1]) == MEM)
8615 && ! rtx_equal_p (operands[0], operands[1]))
8620 /* Return TRUE or FALSE depending on whether the first SET in INSN
8621 has source and destination with matching CC modes, and that the
8622 CC mode is at least as constrained as REQ_MODE. */
8625 ix86_match_ccmode (insn, req_mode)
8627 enum machine_mode req_mode;
8630 enum machine_mode set_mode;
8632 set = PATTERN (insn);
8633 if (GET_CODE (set) == PARALLEL)
8634 set = XVECEXP (set, 0, 0);
8635 if (GET_CODE (set) != SET)
8637 if (GET_CODE (SET_SRC (set)) != COMPARE)
8640 set_mode = GET_MODE (SET_DEST (set));
8644 if (req_mode != CCNOmode
8645 && (req_mode != CCmode
8646 || XEXP (SET_SRC (set), 1) != const0_rtx))
8650 if (req_mode == CCGCmode)
8654 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8658 if (req_mode == CCZmode)
8668 return (GET_MODE (SET_SRC (set)) == set_mode);
8671 /* Generate insn patterns to do an integer compare of OPERANDS. */
8674 ix86_expand_int_compare (code, op0, op1)
8678 enum machine_mode cmpmode;
8681 cmpmode = SELECT_CC_MODE (code, op0, op1);
8682 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8684 /* This is very simple, but making the interface the same as in the
8685 FP case makes the rest of the code easier. */
8686 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8687 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8689 /* Return the test that should be put into the flags user, i.e.
8690 the bcc, scc, or cmov instruction. */
8691 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8694 /* Figure out whether to use ordered or unordered fp comparisons.
8695 Return the appropriate mode to use. */
8698 ix86_fp_compare_mode (code)
8699 enum rtx_code code ATTRIBUTE_UNUSED;
8701 /* ??? In order to make all comparisons reversible, we do all comparisons
8702 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8703 all forms trapping and nontrapping comparisons, we can make inequality
8704 comparisons trapping again, since it results in better code when using
8705 FCOM based compares. */
8706 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8710 ix86_cc_mode (code, op0, op1)
8714 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8715 return ix86_fp_compare_mode (code);
8718 /* Only zero flag is needed. */
8720 case NE: /* ZF!=0 */
8722 /* Codes needing carry flag. */
8723 case GEU: /* CF=0 */
8724 case GTU: /* CF=0 & ZF=0 */
8725 case LTU: /* CF=1 */
8726 case LEU: /* CF=1 | ZF=1 */
8728 /* Codes possibly doable only with sign flag when
8729 comparing against zero. */
8730 case GE: /* SF=OF or SF=0 */
8731 case LT: /* SF<>OF or SF=1 */
8732 if (op1 == const0_rtx)
8735 /* For other cases Carry flag is not required. */
8737 /* Codes doable only with sign flag when comparing
8738 against zero, but we miss jump instruction for it
8739 so we need to use relational tests against overflow
8740 that thus needs to be zero. */
8741 case GT: /* ZF=0 & SF=OF */
8742 case LE: /* ZF=1 | SF<>OF */
8743 if (op1 == const0_rtx)
8747 /* strcmp pattern do (use flags) and combine may ask us for proper
8756 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8759 ix86_use_fcomi_compare (code)
8760 enum rtx_code code ATTRIBUTE_UNUSED;
8762 enum rtx_code swapped_code = swap_condition (code);
8763 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8764 || (ix86_fp_comparison_cost (swapped_code)
8765 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8768 /* Swap, force into registers, or otherwise massage the two operands
8769 to a fp comparison. The operands are updated in place; the new
8770 comparison code is returned. */
8772 static enum rtx_code
8773 ix86_prepare_fp_compare_args (code, pop0, pop1)
8777 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8778 rtx op0 = *pop0, op1 = *pop1;
8779 enum machine_mode op_mode = GET_MODE (op0);
8780 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8782 /* All of the unordered compare instructions only work on registers.
8783 The same is true of the XFmode compare instructions. The same is
8784 true of the fcomi compare instructions. */
8787 && (fpcmp_mode == CCFPUmode
8788 || op_mode == XFmode
8789 || op_mode == TFmode
8790 || ix86_use_fcomi_compare (code)))
8792 op0 = force_reg (op_mode, op0);
8793 op1 = force_reg (op_mode, op1);
8797 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8798 things around if they appear profitable, otherwise force op0
8801 if (standard_80387_constant_p (op0) == 0
8802 || (GET_CODE (op0) == MEM
8803 && ! (standard_80387_constant_p (op1) == 0
8804 || GET_CODE (op1) == MEM)))
8807 tmp = op0, op0 = op1, op1 = tmp;
8808 code = swap_condition (code);
8811 if (GET_CODE (op0) != REG)
8812 op0 = force_reg (op_mode, op0);
8814 if (CONSTANT_P (op1))
8816 if (standard_80387_constant_p (op1))
8817 op1 = force_reg (op_mode, op1);
8819 op1 = validize_mem (force_const_mem (op_mode, op1));
8823 /* Try to rearrange the comparison to make it cheaper. */
8824 if (ix86_fp_comparison_cost (code)
8825 > ix86_fp_comparison_cost (swap_condition (code))
8826 && (GET_CODE (op1) == REG || !no_new_pseudos))
8829 tmp = op0, op0 = op1, op1 = tmp;
8830 code = swap_condition (code);
8831 if (GET_CODE (op0) != REG)
8832 op0 = force_reg (op_mode, op0);
8840 /* Convert comparison codes we use to represent FP comparison to integer
8841 code that will result in proper branch. Return UNKNOWN if no such code
8843 static enum rtx_code
8844 ix86_fp_compare_code_to_integer (code)
8874 /* Split comparison code CODE into comparisons we can do using branch
8875 instructions. BYPASS_CODE is comparison code for branch that will
8876 branch around FIRST_CODE and SECOND_CODE. If some of branches
8877 is not required, set value to NIL.
8878 We never require more than two branches. */
8880 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8881 enum rtx_code code, *bypass_code, *first_code, *second_code;
8887 /* The fcomi comparison sets flags as follows:
8897 case GT: /* GTU - CF=0 & ZF=0 */
8898 case GE: /* GEU - CF=0 */
8899 case ORDERED: /* PF=0 */
8900 case UNORDERED: /* PF=1 */
8901 case UNEQ: /* EQ - ZF=1 */
8902 case UNLT: /* LTU - CF=1 */
8903 case UNLE: /* LEU - CF=1 | ZF=1 */
8904 case LTGT: /* EQ - ZF=0 */
8906 case LT: /* LTU - CF=1 - fails on unordered */
8908 *bypass_code = UNORDERED;
8910 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8912 *bypass_code = UNORDERED;
8914 case EQ: /* EQ - ZF=1 - fails on unordered */
8916 *bypass_code = UNORDERED;
8918 case NE: /* NE - ZF=0 - fails on unordered */
8920 *second_code = UNORDERED;
8922 case UNGE: /* GEU - CF=0 - fails on unordered */
8924 *second_code = UNORDERED;
8926 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8928 *second_code = UNORDERED;
8933 if (!TARGET_IEEE_FP)
8940 /* Return cost of comparison done fcom + arithmetics operations on AX.
8941 All following functions do use number of instructions as a cost metrics.
8942 In future this should be tweaked to compute bytes for optimize_size and
8943 take into account performance of various instructions on various CPUs. */
8945 ix86_fp_comparison_arithmetics_cost (code)
8948 if (!TARGET_IEEE_FP)
8950 /* The cost of code output by ix86_expand_fp_compare. */
8978 /* Return cost of comparison done using fcomi operation.
8979 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8981 ix86_fp_comparison_fcomi_cost (code)
8984 enum rtx_code bypass_code, first_code, second_code;
8985 /* Return arbitrarily high cost when instruction is not supported - this
8986 prevents gcc from using it. */
8989 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8990 return (bypass_code != NIL || second_code != NIL) + 2;
8993 /* Return cost of comparison done using sahf operation.
8994 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8996 ix86_fp_comparison_sahf_cost (code)
8999 enum rtx_code bypass_code, first_code, second_code;
9000 /* Return arbitrarily high cost when instruction is not preferred - this
9001 avoids gcc from using it. */
9002 if (!TARGET_USE_SAHF && !optimize_size)
9004 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9005 return (bypass_code != NIL || second_code != NIL) + 3;
9008 /* Compute cost of the comparison done using any method.
9009 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9011 ix86_fp_comparison_cost (code)
9014 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9017 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9018 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9020 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9021 if (min > sahf_cost)
9023 if (min > fcomi_cost)
9028 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9031 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
9033 rtx op0, op1, scratch;
9037 enum machine_mode fpcmp_mode, intcmp_mode;
9039 int cost = ix86_fp_comparison_cost (code);
9040 enum rtx_code bypass_code, first_code, second_code;
9042 fpcmp_mode = ix86_fp_compare_mode (code);
9043 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9046 *second_test = NULL_RTX;
9048 *bypass_test = NULL_RTX;
9050 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9052 /* Do fcomi/sahf based test when profitable. */
9053 if ((bypass_code == NIL || bypass_test)
9054 && (second_code == NIL || second_test)
9055 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9059 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9060 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9066 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9067 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9069 scratch = gen_reg_rtx (HImode);
9070 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9071 emit_insn (gen_x86_sahf_1 (scratch));
9074 /* The FP codes work out to act like unsigned. */
9075 intcmp_mode = fpcmp_mode;
9077 if (bypass_code != NIL)
9078 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9079 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9081 if (second_code != NIL)
9082 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9083 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9088 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9089 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9090 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9092 scratch = gen_reg_rtx (HImode);
9093 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9095 /* In the unordered case, we have to check C2 for NaN's, which
9096 doesn't happen to work out to anything nice combination-wise.
9097 So do some bit twiddling on the value we've got in AH to come
9098 up with an appropriate set of condition codes. */
9100 intcmp_mode = CCNOmode;
9105 if (code == GT || !TARGET_IEEE_FP)
9107 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9112 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9113 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9114 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9115 intcmp_mode = CCmode;
9121 if (code == LT && TARGET_IEEE_FP)
9123 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9124 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9125 intcmp_mode = CCmode;
9130 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9136 if (code == GE || !TARGET_IEEE_FP)
9138 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9143 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9144 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9151 if (code == LE && TARGET_IEEE_FP)
9153 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9154 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9155 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9156 intcmp_mode = CCmode;
9161 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9167 if (code == EQ && TARGET_IEEE_FP)
9169 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9170 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9171 intcmp_mode = CCmode;
9176 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9183 if (code == NE && TARGET_IEEE_FP)
9185 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9186 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9192 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9198 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9202 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9211 /* Return the test that should be put into the flags user, i.e.
9212 the bcc, scc, or cmov instruction. */
9213 return gen_rtx_fmt_ee (code, VOIDmode,
9214 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9219 ix86_expand_compare (code, second_test, bypass_test)
9221 rtx *second_test, *bypass_test;
9224 op0 = ix86_compare_op0;
9225 op1 = ix86_compare_op1;
9228 *second_test = NULL_RTX;
9230 *bypass_test = NULL_RTX;
9232 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9233 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9234 second_test, bypass_test);
9236 ret = ix86_expand_int_compare (code, op0, op1);
9241 /* Return true if the CODE will result in nontrivial jump sequence. */
9243 ix86_fp_jump_nontrivial_p (code)
9246 enum rtx_code bypass_code, first_code, second_code;
9249 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9250 return bypass_code != NIL || second_code != NIL;
9254 ix86_expand_branch (code, label)
9260 switch (GET_MODE (ix86_compare_op0))
9266 tmp = ix86_expand_compare (code, NULL, NULL);
9267 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9268 gen_rtx_LABEL_REF (VOIDmode, label),
9270 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9280 enum rtx_code bypass_code, first_code, second_code;
9282 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9285 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9287 /* Check whether we will use the natural sequence with one jump. If
9288 so, we can expand jump early. Otherwise delay expansion by
9289 creating compound insn to not confuse optimizers. */
9290 if (bypass_code == NIL && second_code == NIL
9293 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9294 gen_rtx_LABEL_REF (VOIDmode, label),
9299 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9300 ix86_compare_op0, ix86_compare_op1);
9301 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9302 gen_rtx_LABEL_REF (VOIDmode, label),
9304 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9306 use_fcomi = ix86_use_fcomi_compare (code);
9307 vec = rtvec_alloc (3 + !use_fcomi);
9308 RTVEC_ELT (vec, 0) = tmp;
9310 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9312 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9315 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9317 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9325 /* Expand DImode branch into multiple compare+branch. */
9327 rtx lo[2], hi[2], label2;
9328 enum rtx_code code1, code2, code3;
9330 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9332 tmp = ix86_compare_op0;
9333 ix86_compare_op0 = ix86_compare_op1;
9334 ix86_compare_op1 = tmp;
9335 code = swap_condition (code);
9337 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9338 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9340 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9341 avoid two branches. This costs one extra insn, so disable when
9342 optimizing for size. */
9344 if ((code == EQ || code == NE)
9346 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9351 if (hi[1] != const0_rtx)
9352 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9353 NULL_RTX, 0, OPTAB_WIDEN);
9356 if (lo[1] != const0_rtx)
9357 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9358 NULL_RTX, 0, OPTAB_WIDEN);
9360 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9361 NULL_RTX, 0, OPTAB_WIDEN);
9363 ix86_compare_op0 = tmp;
9364 ix86_compare_op1 = const0_rtx;
9365 ix86_expand_branch (code, label);
9369 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9370 op1 is a constant and the low word is zero, then we can just
9371 examine the high word. */
9373 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9376 case LT: case LTU: case GE: case GEU:
9377 ix86_compare_op0 = hi[0];
9378 ix86_compare_op1 = hi[1];
9379 ix86_expand_branch (code, label);
9385 /* Otherwise, we need two or three jumps. */
9387 label2 = gen_label_rtx ();
9390 code2 = swap_condition (code);
9391 code3 = unsigned_condition (code);
9395 case LT: case GT: case LTU: case GTU:
9398 case LE: code1 = LT; code2 = GT; break;
9399 case GE: code1 = GT; code2 = LT; break;
9400 case LEU: code1 = LTU; code2 = GTU; break;
9401 case GEU: code1 = GTU; code2 = LTU; break;
9403 case EQ: code1 = NIL; code2 = NE; break;
9404 case NE: code2 = NIL; break;
9412 * if (hi(a) < hi(b)) goto true;
9413 * if (hi(a) > hi(b)) goto false;
9414 * if (lo(a) < lo(b)) goto true;
9418 ix86_compare_op0 = hi[0];
9419 ix86_compare_op1 = hi[1];
9422 ix86_expand_branch (code1, label);
9424 ix86_expand_branch (code2, label2);
9426 ix86_compare_op0 = lo[0];
9427 ix86_compare_op1 = lo[1];
9428 ix86_expand_branch (code3, label);
9431 emit_label (label2);
9440 /* Split branch based on floating point condition. */
9442 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9444 rtx op1, op2, target1, target2, tmp;
9447 rtx label = NULL_RTX;
9449 int bypass_probability = -1, second_probability = -1, probability = -1;
9452 if (target2 != pc_rtx)
9455 code = reverse_condition_maybe_unordered (code);
9460 condition = ix86_expand_fp_compare (code, op1, op2,
9461 tmp, &second, &bypass);
9463 if (split_branch_probability >= 0)
9465 /* Distribute the probabilities across the jumps.
9466 Assume the BYPASS and SECOND to be always test
9468 probability = split_branch_probability;
9470 /* Value of 1 is low enough to make no need for probability
9471 to be updated. Later we may run some experiments and see
9472 if unordered values are more frequent in practice. */
9474 bypass_probability = 1;
9476 second_probability = 1;
9478 if (bypass != NULL_RTX)
9480 label = gen_label_rtx ();
9481 i = emit_jump_insn (gen_rtx_SET
9483 gen_rtx_IF_THEN_ELSE (VOIDmode,
9485 gen_rtx_LABEL_REF (VOIDmode,
9488 if (bypass_probability >= 0)
9490 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9491 GEN_INT (bypass_probability),
9494 i = emit_jump_insn (gen_rtx_SET
9496 gen_rtx_IF_THEN_ELSE (VOIDmode,
9497 condition, target1, target2)));
9498 if (probability >= 0)
9500 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9501 GEN_INT (probability),
9503 if (second != NULL_RTX)
9505 i = emit_jump_insn (gen_rtx_SET
9507 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9509 if (second_probability >= 0)
9511 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9512 GEN_INT (second_probability),
9515 if (label != NULL_RTX)
9520 ix86_expand_setcc (code, dest)
9524 rtx ret, tmp, tmpreg;
9525 rtx second_test, bypass_test;
9527 if (GET_MODE (ix86_compare_op0) == DImode
9529 return 0; /* FAIL */
9531 if (GET_MODE (dest) != QImode)
9534 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9535 PUT_MODE (ret, QImode);
9540 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9541 if (bypass_test || second_test)
9543 rtx test = second_test;
9545 rtx tmp2 = gen_reg_rtx (QImode);
9552 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9554 PUT_MODE (test, QImode);
9555 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9558 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9560 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9563 return 1; /* DONE */
9566 /* Expand comparison setting or clearing carry flag. Return true when successful
9567 and set pop for the operation. */
9569 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9573 enum machine_mode mode =
9574 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9576 /* Do not handle DImode compares that go trought special path. Also we can't
9577 deal with FP compares yet. This is possible to add. */
9578 if ((mode == DImode && !TARGET_64BIT))
9580 if (FLOAT_MODE_P (mode))
9582 rtx second_test = NULL, bypass_test = NULL;
9583 rtx compare_op, compare_seq;
9585 /* Shortcut: following common codes never translate into carry flag compares. */
9586 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9587 || code == ORDERED || code == UNORDERED)
9590 /* These comparisons require zero flag; swap operands so they won't. */
9591 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9597 code = swap_condition (code);
9600 /* Try to expand the comparsion and verify that we end up with carry flag
9601 based comparsion. This is fails to be true only when we decide to expand
9602 comparsion using arithmetic that is not too common scenario. */
9604 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9605 &second_test, &bypass_test);
9606 compare_seq = get_insns ();
9609 if (second_test || bypass_test)
9611 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9612 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9613 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9615 code = GET_CODE (compare_op);
9616 if (code != LTU && code != GEU)
9618 emit_insn (compare_seq);
9622 if (!INTEGRAL_MODE_P (mode))
9630 /* Convert a==0 into (unsigned)a<1. */
9633 if (op1 != const0_rtx)
9636 code = (code == EQ ? LTU : GEU);
9639 /* Convert a>b into b<a or a>=b-1. */
9642 if (GET_CODE (op1) == CONST_INT)
9644 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9645 /* Bail out on overflow. We still can swap operands but that
9646 would force loading of the constant into register. */
9647 if (op1 == const0_rtx
9648 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9650 code = (code == GTU ? GEU : LTU);
9657 code = (code == GTU ? LTU : GEU);
9661 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9664 if (mode == DImode || op1 != const0_rtx)
9666 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9667 code = (code == LT ? GEU : LTU);
9671 if (mode == DImode || op1 != constm1_rtx)
9673 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9674 code = (code == LE ? GEU : LTU);
9680 ix86_compare_op0 = op0;
9681 ix86_compare_op1 = op1;
9682 *pop = ix86_expand_compare (code, NULL, NULL);
9683 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9689 ix86_expand_int_movcc (operands)
9692 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9693 rtx compare_seq, compare_op;
9694 rtx second_test, bypass_test;
9695 enum machine_mode mode = GET_MODE (operands[0]);
9696 bool sign_bit_compare_p = false;;
9699 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9700 compare_seq = get_insns ();
9703 compare_code = GET_CODE (compare_op);
9705 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9706 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9707 sign_bit_compare_p = true;
9709 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9710 HImode insns, we'd be swallowed in word prefix ops. */
9712 if ((mode != HImode || TARGET_FAST_PREFIX)
9713 && (mode != DImode || TARGET_64BIT)
9714 && GET_CODE (operands[2]) == CONST_INT
9715 && GET_CODE (operands[3]) == CONST_INT)
9717 rtx out = operands[0];
9718 HOST_WIDE_INT ct = INTVAL (operands[2]);
9719 HOST_WIDE_INT cf = INTVAL (operands[3]);
9723 /* Sign bit compares are better done using shifts than we do by using
9725 if (sign_bit_compare_p
9726 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9727 ix86_compare_op1, &compare_op))
9729 /* Detect overlap between destination and compare sources. */
9732 if (!sign_bit_compare_p)
9736 compare_code = GET_CODE (compare_op);
9738 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9739 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9742 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9745 /* To simplify rest of code, restrict to the GEU case. */
9746 if (compare_code == LTU)
9748 HOST_WIDE_INT tmp = ct;
9751 compare_code = reverse_condition (compare_code);
9752 code = reverse_condition (code);
9757 PUT_CODE (compare_op,
9758 reverse_condition_maybe_unordered
9759 (GET_CODE (compare_op)));
9761 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9765 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9766 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9767 tmp = gen_reg_rtx (mode);
9770 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9772 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9776 if (code == GT || code == GE)
9777 code = reverse_condition (code);
9780 HOST_WIDE_INT tmp = ct;
9785 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9786 ix86_compare_op1, VOIDmode, 0, -1);
9799 tmp = expand_simple_binop (mode, PLUS,
9801 copy_rtx (tmp), 1, OPTAB_DIRECT);
9812 tmp = expand_simple_binop (mode, IOR,
9814 copy_rtx (tmp), 1, OPTAB_DIRECT);
9816 else if (diff == -1 && ct)
9826 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9828 tmp = expand_simple_binop (mode, PLUS,
9829 copy_rtx (tmp), GEN_INT (cf),
9830 copy_rtx (tmp), 1, OPTAB_DIRECT);
9838 * andl cf - ct, dest
9848 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9851 tmp = expand_simple_binop (mode, AND,
9853 gen_int_mode (cf - ct, mode),
9854 copy_rtx (tmp), 1, OPTAB_DIRECT);
9856 tmp = expand_simple_binop (mode, PLUS,
9857 copy_rtx (tmp), GEN_INT (ct),
9858 copy_rtx (tmp), 1, OPTAB_DIRECT);
9861 if (!rtx_equal_p (tmp, out))
9862 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9864 return 1; /* DONE */
9870 tmp = ct, ct = cf, cf = tmp;
9872 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9874 /* We may be reversing unordered compare to normal compare, that
9875 is not valid in general (we may convert non-trapping condition
9876 to trapping one), however on i386 we currently emit all
9877 comparisons unordered. */
9878 compare_code = reverse_condition_maybe_unordered (compare_code);
9879 code = reverse_condition_maybe_unordered (code);
9883 compare_code = reverse_condition (compare_code);
9884 code = reverse_condition (code);
9889 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9890 && GET_CODE (ix86_compare_op1) == CONST_INT)
9892 if (ix86_compare_op1 == const0_rtx
9893 && (code == LT || code == GE))
9894 compare_code = code;
9895 else if (ix86_compare_op1 == constm1_rtx)
9899 else if (code == GT)
9904 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9905 if (compare_code != NIL
9906 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9907 && (cf == -1 || ct == -1))
9909 /* If lea code below could be used, only optimize
9910 if it results in a 2 insn sequence. */
9912 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9913 || diff == 3 || diff == 5 || diff == 9)
9914 || (compare_code == LT && ct == -1)
9915 || (compare_code == GE && cf == -1))
9918 * notl op1 (if necessary)
9926 code = reverse_condition (code);
9929 out = emit_store_flag (out, code, ix86_compare_op0,
9930 ix86_compare_op1, VOIDmode, 0, -1);
9932 out = expand_simple_binop (mode, IOR,
9934 out, 1, OPTAB_DIRECT);
9935 if (out != operands[0])
9936 emit_move_insn (operands[0], out);
9938 return 1; /* DONE */
9943 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9944 || diff == 3 || diff == 5 || diff == 9)
9945 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9946 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9952 * lea cf(dest*(ct-cf)),dest
9956 * This also catches the degenerate setcc-only case.
9962 out = emit_store_flag (out, code, ix86_compare_op0,
9963 ix86_compare_op1, VOIDmode, 0, 1);
9966 /* On x86_64 the lea instruction operates on Pmode, so we need
9967 to get arithmetics done in proper mode to match. */
9969 tmp = copy_rtx (out);
9973 out1 = copy_rtx (out);
9974 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9978 tmp = gen_rtx_PLUS (mode, tmp, out1);
9984 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9987 if (!rtx_equal_p (tmp, out))
9990 out = force_operand (tmp, copy_rtx (out));
9992 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9994 if (!rtx_equal_p (out, operands[0]))
9995 emit_move_insn (operands[0], copy_rtx (out));
9997 return 1; /* DONE */
10001 * General case: Jumpful:
10002 * xorl dest,dest cmpl op1, op2
10003 * cmpl op1, op2 movl ct, dest
10004 * setcc dest jcc 1f
10005 * decl dest movl cf, dest
10006 * andl (cf-ct),dest 1:
10009 * Size 20. Size 14.
10011 * This is reasonably steep, but branch mispredict costs are
10012 * high on modern cpus, so consider failing only if optimizing
10016 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10017 && BRANCH_COST >= 2)
10023 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10024 /* We may be reversing unordered compare to normal compare,
10025 that is not valid in general (we may convert non-trapping
10026 condition to trapping one), however on i386 we currently
10027 emit all comparisons unordered. */
10028 code = reverse_condition_maybe_unordered (code);
10031 code = reverse_condition (code);
10032 if (compare_code != NIL)
10033 compare_code = reverse_condition (compare_code);
10037 if (compare_code != NIL)
10039 /* notl op1 (if needed)
10044 For x < 0 (resp. x <= -1) there will be no notl,
10045 so if possible swap the constants to get rid of the
10047 True/false will be -1/0 while code below (store flag
10048 followed by decrement) is 0/-1, so the constants need
10049 to be exchanged once more. */
10051 if (compare_code == GE || !cf)
10053 code = reverse_condition (code);
10058 HOST_WIDE_INT tmp = cf;
10063 out = emit_store_flag (out, code, ix86_compare_op0,
10064 ix86_compare_op1, VOIDmode, 0, -1);
10068 out = emit_store_flag (out, code, ix86_compare_op0,
10069 ix86_compare_op1, VOIDmode, 0, 1);
10071 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10072 copy_rtx (out), 1, OPTAB_DIRECT);
10075 out = expand_simple_binop (mode, AND, copy_rtx (out),
10076 gen_int_mode (cf - ct, mode),
10077 copy_rtx (out), 1, OPTAB_DIRECT);
10079 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10080 copy_rtx (out), 1, OPTAB_DIRECT);
10081 if (!rtx_equal_p (out, operands[0]))
10082 emit_move_insn (operands[0], copy_rtx (out));
10084 return 1; /* DONE */
10088 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10090 /* Try a few things more with specific constants and a variable. */
10093 rtx var, orig_out, out, tmp;
10095 if (BRANCH_COST <= 2)
10096 return 0; /* FAIL */
10098 /* If one of the two operands is an interesting constant, load a
10099 constant with the above and mask it in with a logical operation. */
10101 if (GET_CODE (operands[2]) == CONST_INT)
10104 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10105 operands[3] = constm1_rtx, op = and_optab;
10106 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10107 operands[3] = const0_rtx, op = ior_optab;
10109 return 0; /* FAIL */
10111 else if (GET_CODE (operands[3]) == CONST_INT)
10114 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10115 operands[2] = constm1_rtx, op = and_optab;
10116 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10117 operands[2] = const0_rtx, op = ior_optab;
10119 return 0; /* FAIL */
10122 return 0; /* FAIL */
10124 orig_out = operands[0];
10125 tmp = gen_reg_rtx (mode);
10128 /* Recurse to get the constant loaded. */
10129 if (ix86_expand_int_movcc (operands) == 0)
10130 return 0; /* FAIL */
10132 /* Mask in the interesting variable. */
10133 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10135 if (!rtx_equal_p (out, orig_out))
10136 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10138 return 1; /* DONE */
10142 * For comparison with above,
10152 if (! nonimmediate_operand (operands[2], mode))
10153 operands[2] = force_reg (mode, operands[2]);
10154 if (! nonimmediate_operand (operands[3], mode))
10155 operands[3] = force_reg (mode, operands[3]);
10157 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10159 rtx tmp = gen_reg_rtx (mode);
10160 emit_move_insn (tmp, operands[3]);
10163 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10165 rtx tmp = gen_reg_rtx (mode);
10166 emit_move_insn (tmp, operands[2]);
10170 if (! register_operand (operands[2], VOIDmode)
10172 || ! register_operand (operands[3], VOIDmode)))
10173 operands[2] = force_reg (mode, operands[2]);
10176 && ! register_operand (operands[3], VOIDmode))
10177 operands[3] = force_reg (mode, operands[3]);
10179 emit_insn (compare_seq);
10180 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10181 gen_rtx_IF_THEN_ELSE (mode,
10182 compare_op, operands[2],
10185 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10186 gen_rtx_IF_THEN_ELSE (mode,
10188 copy_rtx (operands[3]),
10189 copy_rtx (operands[0]))));
10191 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10192 gen_rtx_IF_THEN_ELSE (mode,
10194 copy_rtx (operands[2]),
10195 copy_rtx (operands[0]))));
10197 return 1; /* DONE */
10201 ix86_expand_fp_movcc (operands)
10204 enum rtx_code code;
10206 rtx compare_op, second_test, bypass_test;
10208 /* For SF/DFmode conditional moves based on comparisons
10209 in same mode, we may want to use SSE min/max instructions. */
10210 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10211 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10212 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10213 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
10214 && (!TARGET_IEEE_FP
10215 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10216 /* We may be called from the post-reload splitter. */
10217 && (!REG_P (operands[0])
10218 || SSE_REG_P (operands[0])
10219 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10221 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10222 code = GET_CODE (operands[1]);
10224 /* See if we have (cross) match between comparison operands and
10225 conditional move operands. */
10226 if (rtx_equal_p (operands[2], op1))
10231 code = reverse_condition_maybe_unordered (code);
10233 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10235 /* Check for min operation. */
10236 if (code == LT || code == UNLE)
10244 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10245 if (memory_operand (op0, VOIDmode))
10246 op0 = force_reg (GET_MODE (operands[0]), op0);
10247 if (GET_MODE (operands[0]) == SFmode)
10248 emit_insn (gen_minsf3 (operands[0], op0, op1));
10250 emit_insn (gen_mindf3 (operands[0], op0, op1));
10253 /* Check for max operation. */
10254 if (code == GT || code == UNGE)
10262 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10263 if (memory_operand (op0, VOIDmode))
10264 op0 = force_reg (GET_MODE (operands[0]), op0);
10265 if (GET_MODE (operands[0]) == SFmode)
10266 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10268 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10272 /* Manage condition to be sse_comparison_operator. In case we are
10273 in non-ieee mode, try to canonicalize the destination operand
10274 to be first in the comparison - this helps reload to avoid extra
10276 if (!sse_comparison_operator (operands[1], VOIDmode)
10277 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10279 rtx tmp = ix86_compare_op0;
10280 ix86_compare_op0 = ix86_compare_op1;
10281 ix86_compare_op1 = tmp;
10282 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10283 VOIDmode, ix86_compare_op0,
10286 /* Similarly try to manage result to be first operand of conditional
10287 move. We also don't support the NE comparison on SSE, so try to
10289 if ((rtx_equal_p (operands[0], operands[3])
10290 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10291 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10293 rtx tmp = operands[2];
10294 operands[2] = operands[3];
10296 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10297 (GET_CODE (operands[1])),
10298 VOIDmode, ix86_compare_op0,
10301 if (GET_MODE (operands[0]) == SFmode)
10302 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10303 operands[2], operands[3],
10304 ix86_compare_op0, ix86_compare_op1));
10306 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10307 operands[2], operands[3],
10308 ix86_compare_op0, ix86_compare_op1));
10312 /* The floating point conditional move instructions don't directly
10313 support conditions resulting from a signed integer comparison. */
10315 code = GET_CODE (operands[1]);
10316 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10318 /* The floating point conditional move instructions don't directly
10319 support signed integer comparisons. */
10321 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10323 if (second_test != NULL || bypass_test != NULL)
10325 tmp = gen_reg_rtx (QImode);
10326 ix86_expand_setcc (code, tmp);
10328 ix86_compare_op0 = tmp;
10329 ix86_compare_op1 = const0_rtx;
10330 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10332 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10334 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10335 emit_move_insn (tmp, operands[3]);
10338 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10340 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10341 emit_move_insn (tmp, operands[2]);
10345 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10346 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10351 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10352 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10357 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10358 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10366 /* Expand conditional increment or decrement using adb/sbb instructions.
10367 The default case using setcc followed by the conditional move can be
10368 done by generic code. */
10370 ix86_expand_int_addcc (operands)
10373 enum rtx_code code = GET_CODE (operands[1]);
10375 rtx val = const0_rtx;
10376 bool fpcmp = false;
10377 enum machine_mode mode = GET_MODE (operands[0]);
10379 if (operands[3] != const1_rtx
10380 && operands[3] != constm1_rtx)
10382 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10383 ix86_compare_op1, &compare_op))
10385 code = GET_CODE (compare_op);
10387 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10388 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10391 code = ix86_fp_compare_code_to_integer (code);
10398 PUT_CODE (compare_op,
10399 reverse_condition_maybe_unordered
10400 (GET_CODE (compare_op)));
10402 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10404 PUT_MODE (compare_op, mode);
10406 /* Construct either adc or sbb insn. */
10407 if ((code == LTU) == (operands[3] == constm1_rtx))
10409 switch (GET_MODE (operands[0]))
10412 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10415 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10418 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10421 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10429 switch (GET_MODE (operands[0]))
10432 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10435 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10438 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10441 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10447 return 1; /* DONE */
10451 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10452 works for floating pointer parameters and nonoffsetable memories.
10453 For pushes, it returns just stack offsets; the values will be saved
10454 in the right order. Maximally three parts are generated. */
10457 ix86_split_to_parts (operand, parts, mode)
10460 enum machine_mode mode;
10465 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10467 size = (GET_MODE_SIZE (mode) + 4) / 8;
10469 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10471 if (size < 2 || size > 3)
10474 /* Optimize constant pool reference to immediates. This is used by fp
10475 moves, that force all constants to memory to allow combining. */
10476 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10478 rtx tmp = maybe_get_pool_constant (operand);
10483 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10485 /* The only non-offsetable memories we handle are pushes. */
10486 if (! push_operand (operand, VOIDmode))
10489 operand = copy_rtx (operand);
10490 PUT_MODE (operand, Pmode);
10491 parts[0] = parts[1] = parts[2] = operand;
10493 else if (!TARGET_64BIT)
10495 if (mode == DImode)
10496 split_di (&operand, 1, &parts[0], &parts[1]);
10499 if (REG_P (operand))
10501 if (!reload_completed)
10503 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10504 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10506 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10508 else if (offsettable_memref_p (operand))
10510 operand = adjust_address (operand, SImode, 0);
10511 parts[0] = operand;
10512 parts[1] = adjust_address (operand, SImode, 4);
10514 parts[2] = adjust_address (operand, SImode, 8);
10516 else if (GET_CODE (operand) == CONST_DOUBLE)
10521 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10526 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10527 parts[2] = gen_int_mode (l[2], SImode);
10530 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10535 parts[1] = gen_int_mode (l[1], SImode);
10536 parts[0] = gen_int_mode (l[0], SImode);
10544 if (mode == TImode)
10545 split_ti (&operand, 1, &parts[0], &parts[1]);
10546 if (mode == XFmode || mode == TFmode)
10548 if (REG_P (operand))
10550 if (!reload_completed)
10552 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10553 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10555 else if (offsettable_memref_p (operand))
10557 operand = adjust_address (operand, DImode, 0);
10558 parts[0] = operand;
10559 parts[1] = adjust_address (operand, SImode, 8);
10561 else if (GET_CODE (operand) == CONST_DOUBLE)
10566 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10567 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10568 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10569 if (HOST_BITS_PER_WIDE_INT >= 64)
10572 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10573 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10576 parts[0] = immed_double_const (l[0], l[1], DImode);
10577 parts[1] = gen_int_mode (l[2], SImode);
10587 /* Emit insns to perform a move or push of DI, DF, and XF values.
10588 Return false when normal moves are needed; true when all required
10589 insns have been emitted. Operands 2-4 contain the input values
10590 int the correct order; operands 5-7 contain the output values. */
10593 ix86_split_long_move (operands)
10599 int collisions = 0;
10600 enum machine_mode mode = GET_MODE (operands[0]);
10602 /* The DFmode expanders may ask us to move double.
10603 For 64bit target this is single move. By hiding the fact
10604 here we simplify i386.md splitters. */
10605 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10607 /* Optimize constant pool reference to immediates. This is used by
10608 fp moves, that force all constants to memory to allow combining. */
10610 if (GET_CODE (operands[1]) == MEM
10611 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10612 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10613 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10614 if (push_operand (operands[0], VOIDmode))
10616 operands[0] = copy_rtx (operands[0]);
10617 PUT_MODE (operands[0], Pmode);
10620 operands[0] = gen_lowpart (DImode, operands[0]);
10621 operands[1] = gen_lowpart (DImode, operands[1]);
10622 emit_move_insn (operands[0], operands[1]);
10626 /* The only non-offsettable memory we handle is push. */
10627 if (push_operand (operands[0], VOIDmode))
10629 else if (GET_CODE (operands[0]) == MEM
10630 && ! offsettable_memref_p (operands[0]))
10633 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10634 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10636 /* When emitting push, take care for source operands on the stack. */
10637 if (push && GET_CODE (operands[1]) == MEM
10638 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10641 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10642 XEXP (part[1][2], 0));
10643 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10644 XEXP (part[1][1], 0));
10647 /* We need to do copy in the right order in case an address register
10648 of the source overlaps the destination. */
10649 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10651 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10653 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10656 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10659 /* Collision in the middle part can be handled by reordering. */
10660 if (collisions == 1 && nparts == 3
10661 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10664 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10665 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10668 /* If there are more collisions, we can't handle it by reordering.
10669 Do an lea to the last part and use only one colliding move. */
10670 else if (collisions > 1)
10676 base = part[0][nparts - 1];
10678 /* Handle the case when the last part isn't valid for lea.
10679 Happens in 64-bit mode storing the 12-byte XFmode. */
10680 if (GET_MODE (base) != Pmode)
10681 base = gen_rtx_REG (Pmode, REGNO (base));
10683 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10684 part[1][0] = replace_equiv_address (part[1][0], base);
10685 part[1][1] = replace_equiv_address (part[1][1],
10686 plus_constant (base, UNITS_PER_WORD));
10688 part[1][2] = replace_equiv_address (part[1][2],
10689 plus_constant (base, 8));
10699 /* We use only first 12 bytes of TFmode value, but for pushing we
10700 are required to adjust stack as if we were pushing real 16byte
10702 if (mode == TFmode && !TARGET_64BIT)
10703 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10705 emit_move_insn (part[0][2], part[1][2]);
10710 /* In 64bit mode we don't have 32bit push available. In case this is
10711 register, it is OK - we will just use larger counterpart. We also
10712 retype memory - these comes from attempt to avoid REX prefix on
10713 moving of second half of TFmode value. */
10714 if (GET_MODE (part[1][1]) == SImode)
10716 if (GET_CODE (part[1][1]) == MEM)
10717 part[1][1] = adjust_address (part[1][1], DImode, 0);
10718 else if (REG_P (part[1][1]))
10719 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10722 if (GET_MODE (part[1][0]) == SImode)
10723 part[1][0] = part[1][1];
10726 emit_move_insn (part[0][1], part[1][1]);
10727 emit_move_insn (part[0][0], part[1][0]);
10731 /* Choose correct order to not overwrite the source before it is copied. */
10732 if ((REG_P (part[0][0])
10733 && REG_P (part[1][1])
10734 && (REGNO (part[0][0]) == REGNO (part[1][1])
10736 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10738 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10742 operands[2] = part[0][2];
10743 operands[3] = part[0][1];
10744 operands[4] = part[0][0];
10745 operands[5] = part[1][2];
10746 operands[6] = part[1][1];
10747 operands[7] = part[1][0];
10751 operands[2] = part[0][1];
10752 operands[3] = part[0][0];
10753 operands[5] = part[1][1];
10754 operands[6] = part[1][0];
10761 operands[2] = part[0][0];
10762 operands[3] = part[0][1];
10763 operands[4] = part[0][2];
10764 operands[5] = part[1][0];
10765 operands[6] = part[1][1];
10766 operands[7] = part[1][2];
10770 operands[2] = part[0][0];
10771 operands[3] = part[0][1];
10772 operands[5] = part[1][0];
10773 operands[6] = part[1][1];
10776 emit_move_insn (operands[2], operands[5]);
10777 emit_move_insn (operands[3], operands[6]);
10779 emit_move_insn (operands[4], operands[7]);
10785 ix86_split_ashldi (operands, scratch)
10786 rtx *operands, scratch;
10788 rtx low[2], high[2];
10791 if (GET_CODE (operands[2]) == CONST_INT)
10793 split_di (operands, 2, low, high);
10794 count = INTVAL (operands[2]) & 63;
10798 emit_move_insn (high[0], low[1]);
10799 emit_move_insn (low[0], const0_rtx);
10802 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10806 if (!rtx_equal_p (operands[0], operands[1]))
10807 emit_move_insn (operands[0], operands[1]);
10808 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10809 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10814 if (!rtx_equal_p (operands[0], operands[1]))
10815 emit_move_insn (operands[0], operands[1]);
10817 split_di (operands, 1, low, high);
10819 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10820 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10822 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10824 if (! no_new_pseudos)
10825 scratch = force_reg (SImode, const0_rtx);
10827 emit_move_insn (scratch, const0_rtx);
10829 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10833 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10838 ix86_split_ashrdi (operands, scratch)
10839 rtx *operands, scratch;
10841 rtx low[2], high[2];
10844 if (GET_CODE (operands[2]) == CONST_INT)
10846 split_di (operands, 2, low, high);
10847 count = INTVAL (operands[2]) & 63;
10851 emit_move_insn (low[0], high[1]);
10853 if (! reload_completed)
10854 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10857 emit_move_insn (high[0], low[0]);
10858 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10862 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10866 if (!rtx_equal_p (operands[0], operands[1]))
10867 emit_move_insn (operands[0], operands[1]);
10868 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10869 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10874 if (!rtx_equal_p (operands[0], operands[1]))
10875 emit_move_insn (operands[0], operands[1]);
10877 split_di (operands, 1, low, high);
10879 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10880 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10882 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10884 if (! no_new_pseudos)
10885 scratch = gen_reg_rtx (SImode);
10886 emit_move_insn (scratch, high[0]);
10887 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10888 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10892 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10897 ix86_split_lshrdi (operands, scratch)
10898 rtx *operands, scratch;
10900 rtx low[2], high[2];
10903 if (GET_CODE (operands[2]) == CONST_INT)
10905 split_di (operands, 2, low, high);
10906 count = INTVAL (operands[2]) & 63;
10910 emit_move_insn (low[0], high[1]);
10911 emit_move_insn (high[0], const0_rtx);
10914 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10918 if (!rtx_equal_p (operands[0], operands[1]))
10919 emit_move_insn (operands[0], operands[1]);
10920 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10921 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10926 if (!rtx_equal_p (operands[0], operands[1]))
10927 emit_move_insn (operands[0], operands[1]);
10929 split_di (operands, 1, low, high);
10931 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10932 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10934 /* Heh. By reversing the arguments, we can reuse this pattern. */
10935 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10937 if (! no_new_pseudos)
10938 scratch = force_reg (SImode, const0_rtx);
10940 emit_move_insn (scratch, const0_rtx);
10942 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10946 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10950 /* Helper function for the string operations below. Dest VARIABLE whether
10951 it is aligned to VALUE bytes. If true, jump to the label. */
10953 ix86_expand_aligntest (variable, value)
10957 rtx label = gen_label_rtx ();
10958 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10959 if (GET_MODE (variable) == DImode)
10960 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10962 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10963 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10968 /* Adjust COUNTER by the VALUE. */
10970 ix86_adjust_counter (countreg, value)
10972 HOST_WIDE_INT value;
10974 if (GET_MODE (countreg) == DImode)
10975 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10977 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10980 /* Zero extend possibly SImode EXP to Pmode register. */
10982 ix86_zero_extend_to_Pmode (exp)
10986 if (GET_MODE (exp) == VOIDmode)
10987 return force_reg (Pmode, exp);
10988 if (GET_MODE (exp) == Pmode)
10989 return copy_to_mode_reg (Pmode, exp);
10990 r = gen_reg_rtx (Pmode);
10991 emit_insn (gen_zero_extendsidi2 (r, exp));
10995 /* Expand string move (memcpy) operation. Use i386 string operations when
10996 profitable. expand_clrstr contains similar code. */
10998 ix86_expand_movstr (dst, src, count_exp, align_exp)
10999 rtx dst, src, count_exp, align_exp;
11001 rtx srcreg, destreg, countreg;
11002 enum machine_mode counter_mode;
11003 HOST_WIDE_INT align = 0;
11004 unsigned HOST_WIDE_INT count = 0;
11007 if (GET_CODE (align_exp) == CONST_INT)
11008 align = INTVAL (align_exp);
11010 /* Can't use any of this if the user has appropriated esi or edi. */
11011 if (global_regs[4] || global_regs[5])
11014 /* This simple hack avoids all inlining code and simplifies code below. */
11015 if (!TARGET_ALIGN_STRINGOPS)
11018 if (GET_CODE (count_exp) == CONST_INT)
11020 count = INTVAL (count_exp);
11021 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11025 /* Figure out proper mode for counter. For 32bits it is always SImode,
11026 for 64bits use SImode when possible, otherwise DImode.
11027 Set count to number of bytes copied when known at compile time. */
11028 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11029 || x86_64_zero_extended_value (count_exp))
11030 counter_mode = SImode;
11032 counter_mode = DImode;
11036 if (counter_mode != SImode && counter_mode != DImode)
11039 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11040 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11042 emit_insn (gen_cld ());
11044 /* When optimizing for size emit simple rep ; movsb instruction for
11045 counts not divisible by 4. */
11047 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11049 countreg = ix86_zero_extend_to_Pmode (count_exp);
11051 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
11052 destreg, srcreg, countreg));
11054 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
11055 destreg, srcreg, countreg));
11058 /* For constant aligned (or small unaligned) copies use rep movsl
11059 followed by code copying the rest. For PentiumPro ensure 8 byte
11060 alignment to allow rep movsl acceleration. */
11062 else if (count != 0
11064 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11065 || optimize_size || count < (unsigned int) 64))
11067 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11068 if (count & ~(size - 1))
11070 countreg = copy_to_mode_reg (counter_mode,
11071 GEN_INT ((count >> (size == 4 ? 2 : 3))
11072 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11073 countreg = ix86_zero_extend_to_Pmode (countreg);
11077 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
11078 destreg, srcreg, countreg));
11080 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
11081 destreg, srcreg, countreg));
11084 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
11085 destreg, srcreg, countreg));
11087 if (size == 8 && (count & 0x04))
11088 emit_insn (gen_strmovsi (destreg, srcreg));
11090 emit_insn (gen_strmovhi (destreg, srcreg));
11092 emit_insn (gen_strmovqi (destreg, srcreg));
11094 /* The generic code based on the glibc implementation:
11095 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11096 allowing accelerated copying there)
11097 - copy the data using rep movsl
11098 - copy the rest. */
11103 int desired_alignment = (TARGET_PENTIUMPRO
11104 && (count == 0 || count >= (unsigned int) 260)
11105 ? 8 : UNITS_PER_WORD);
11107 /* In case we don't know anything about the alignment, default to
11108 library version, since it is usually equally fast and result in
11111 Also emit call when we know that the count is large and call overhead
11112 will not be important. */
11113 if (!TARGET_INLINE_ALL_STRINGOPS
11114 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11120 if (TARGET_SINGLE_STRINGOP)
11121 emit_insn (gen_cld ());
11123 countreg2 = gen_reg_rtx (Pmode);
11124 countreg = copy_to_mode_reg (counter_mode, count_exp);
11126 /* We don't use loops to align destination and to copy parts smaller
11127 than 4 bytes, because gcc is able to optimize such code better (in
11128 the case the destination or the count really is aligned, gcc is often
11129 able to predict the branches) and also it is friendlier to the
11130 hardware branch prediction.
11132 Using loops is beneficial for generic case, because we can
11133 handle small counts using the loops. Many CPUs (such as Athlon)
11134 have large REP prefix setup costs.
11136 This is quite costly. Maybe we can revisit this decision later or
11137 add some customizability to this code. */
11139 if (count == 0 && align < desired_alignment)
11141 label = gen_label_rtx ();
11142 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11143 LEU, 0, counter_mode, 1, label);
11147 rtx label = ix86_expand_aligntest (destreg, 1);
11148 emit_insn (gen_strmovqi (destreg, srcreg));
11149 ix86_adjust_counter (countreg, 1);
11150 emit_label (label);
11151 LABEL_NUSES (label) = 1;
11155 rtx label = ix86_expand_aligntest (destreg, 2);
11156 emit_insn (gen_strmovhi (destreg, srcreg));
11157 ix86_adjust_counter (countreg, 2);
11158 emit_label (label);
11159 LABEL_NUSES (label) = 1;
11161 if (align <= 4 && desired_alignment > 4)
11163 rtx label = ix86_expand_aligntest (destreg, 4);
11164 emit_insn (gen_strmovsi (destreg, srcreg));
11165 ix86_adjust_counter (countreg, 4);
11166 emit_label (label);
11167 LABEL_NUSES (label) = 1;
11170 if (label && desired_alignment > 4 && !TARGET_64BIT)
11172 emit_label (label);
11173 LABEL_NUSES (label) = 1;
11176 if (!TARGET_SINGLE_STRINGOP)
11177 emit_insn (gen_cld ());
11180 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11182 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
11183 destreg, srcreg, countreg2));
11187 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11188 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
11189 destreg, srcreg, countreg2));
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11197 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11198 emit_insn (gen_strmovsi (destreg, srcreg));
11199 if ((align <= 4 || count == 0) && TARGET_64BIT)
11201 rtx label = ix86_expand_aligntest (countreg, 4);
11202 emit_insn (gen_strmovsi (destreg, srcreg));
11203 emit_label (label);
11204 LABEL_NUSES (label) = 1;
11206 if (align > 2 && count != 0 && (count & 2))
11207 emit_insn (gen_strmovhi (destreg, srcreg));
11208 if (align <= 2 || count == 0)
11210 rtx label = ix86_expand_aligntest (countreg, 2);
11211 emit_insn (gen_strmovhi (destreg, srcreg));
11212 emit_label (label);
11213 LABEL_NUSES (label) = 1;
11215 if (align > 1 && count != 0 && (count & 1))
11216 emit_insn (gen_strmovqi (destreg, srcreg));
11217 if (align <= 1 || count == 0)
11219 rtx label = ix86_expand_aligntest (countreg, 1);
11220 emit_insn (gen_strmovqi (destreg, srcreg));
11221 emit_label (label);
11222 LABEL_NUSES (label) = 1;
11226 insns = get_insns ();
11229 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
11234 /* Expand string clear operation (bzero). Use i386 string operations when
11235 profitable. expand_movstr contains similar code. */
11237 ix86_expand_clrstr (src, count_exp, align_exp)
11238 rtx src, count_exp, align_exp;
11240 rtx destreg, zeroreg, countreg;
11241 enum machine_mode counter_mode;
11242 HOST_WIDE_INT align = 0;
11243 unsigned HOST_WIDE_INT count = 0;
11245 if (GET_CODE (align_exp) == CONST_INT)
11246 align = INTVAL (align_exp);
11248 /* Can't use any of this if the user has appropriated esi. */
11249 if (global_regs[4])
11252 /* This simple hack avoids all inlining code and simplifies code below. */
11253 if (!TARGET_ALIGN_STRINGOPS)
11256 if (GET_CODE (count_exp) == CONST_INT)
11258 count = INTVAL (count_exp);
11259 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11262 /* Figure out proper mode for counter. For 32bits it is always SImode,
11263 for 64bits use SImode when possible, otherwise DImode.
11264 Set count to number of bytes copied when known at compile time. */
11265 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11266 || x86_64_zero_extended_value (count_exp))
11267 counter_mode = SImode;
11269 counter_mode = DImode;
11271 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11273 emit_insn (gen_cld ());
11275 /* When optimizing for size emit simple rep ; movsb instruction for
11276 counts not divisible by 4. */
11278 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11280 countreg = ix86_zero_extend_to_Pmode (count_exp);
11281 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11283 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11284 destreg, countreg));
11286 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11287 destreg, countreg));
11289 else if (count != 0
11291 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11292 || optimize_size || count < (unsigned int) 64))
11294 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11295 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11296 if (count & ~(size - 1))
11298 countreg = copy_to_mode_reg (counter_mode,
11299 GEN_INT ((count >> (size == 4 ? 2 : 3))
11300 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11301 countreg = ix86_zero_extend_to_Pmode (countreg);
11305 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11306 destreg, countreg));
11308 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11309 destreg, countreg));
11312 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11313 destreg, countreg));
11315 if (size == 8 && (count & 0x04))
11316 emit_insn (gen_strsetsi (destreg,
11317 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11319 emit_insn (gen_strsethi (destreg,
11320 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11322 emit_insn (gen_strsetqi (destreg,
11323 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11329 /* Compute desired alignment of the string operation. */
11330 int desired_alignment = (TARGET_PENTIUMPRO
11331 && (count == 0 || count >= (unsigned int) 260)
11332 ? 8 : UNITS_PER_WORD);
11334 /* In case we don't know anything about the alignment, default to
11335 library version, since it is usually equally fast and result in
11338 Also emit call when we know that the count is large and call overhead
11339 will not be important. */
11340 if (!TARGET_INLINE_ALL_STRINGOPS
11341 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11344 if (TARGET_SINGLE_STRINGOP)
11345 emit_insn (gen_cld ());
11347 countreg2 = gen_reg_rtx (Pmode);
11348 countreg = copy_to_mode_reg (counter_mode, count_exp);
11349 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11351 if (count == 0 && align < desired_alignment)
11353 label = gen_label_rtx ();
11354 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11355 LEU, 0, counter_mode, 1, label);
11359 rtx label = ix86_expand_aligntest (destreg, 1);
11360 emit_insn (gen_strsetqi (destreg,
11361 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11362 ix86_adjust_counter (countreg, 1);
11363 emit_label (label);
11364 LABEL_NUSES (label) = 1;
11368 rtx label = ix86_expand_aligntest (destreg, 2);
11369 emit_insn (gen_strsethi (destreg,
11370 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11371 ix86_adjust_counter (countreg, 2);
11372 emit_label (label);
11373 LABEL_NUSES (label) = 1;
11375 if (align <= 4 && desired_alignment > 4)
11377 rtx label = ix86_expand_aligntest (destreg, 4);
11378 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11379 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11381 ix86_adjust_counter (countreg, 4);
11382 emit_label (label);
11383 LABEL_NUSES (label) = 1;
11386 if (label && desired_alignment > 4 && !TARGET_64BIT)
11388 emit_label (label);
11389 LABEL_NUSES (label) = 1;
11393 if (!TARGET_SINGLE_STRINGOP)
11394 emit_insn (gen_cld ());
11397 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11399 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11400 destreg, countreg2));
11404 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11405 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11406 destreg, countreg2));
11410 emit_label (label);
11411 LABEL_NUSES (label) = 1;
11414 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11415 emit_insn (gen_strsetsi (destreg,
11416 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11417 if (TARGET_64BIT && (align <= 4 || count == 0))
11419 rtx label = ix86_expand_aligntest (countreg, 4);
11420 emit_insn (gen_strsetsi (destreg,
11421 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11422 emit_label (label);
11423 LABEL_NUSES (label) = 1;
11425 if (align > 2 && count != 0 && (count & 2))
11426 emit_insn (gen_strsethi (destreg,
11427 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11428 if (align <= 2 || count == 0)
11430 rtx label = ix86_expand_aligntest (countreg, 2);
11431 emit_insn (gen_strsethi (destreg,
11432 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11433 emit_label (label);
11434 LABEL_NUSES (label) = 1;
11436 if (align > 1 && count != 0 && (count & 1))
11437 emit_insn (gen_strsetqi (destreg,
11438 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11439 if (align <= 1 || count == 0)
11441 rtx label = ix86_expand_aligntest (countreg, 1);
11442 emit_insn (gen_strsetqi (destreg,
11443 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11444 emit_label (label);
11445 LABEL_NUSES (label) = 1;
11450 /* Expand strlen. */
11452 ix86_expand_strlen (out, src, eoschar, align)
11453 rtx out, src, eoschar, align;
11455 rtx addr, scratch1, scratch2, scratch3, scratch4;
11457 /* The generic case of strlen expander is long. Avoid it's
11458 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11460 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11461 && !TARGET_INLINE_ALL_STRINGOPS
11463 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11466 addr = force_reg (Pmode, XEXP (src, 0));
11467 scratch1 = gen_reg_rtx (Pmode);
11469 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11472 /* Well it seems that some optimizer does not combine a call like
11473 foo(strlen(bar), strlen(bar));
11474 when the move and the subtraction is done here. It does calculate
11475 the length just once when these instructions are done inside of
11476 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11477 often used and I use one fewer register for the lifetime of
11478 output_strlen_unroll() this is better. */
11480 emit_move_insn (out, addr);
11482 ix86_expand_strlensi_unroll_1 (out, align);
11484 /* strlensi_unroll_1 returns the address of the zero at the end of
11485 the string, like memchr(), so compute the length by subtracting
11486 the start address. */
11488 emit_insn (gen_subdi3 (out, out, addr));
11490 emit_insn (gen_subsi3 (out, out, addr));
11494 scratch2 = gen_reg_rtx (Pmode);
11495 scratch3 = gen_reg_rtx (Pmode);
11496 scratch4 = force_reg (Pmode, constm1_rtx);
11498 emit_move_insn (scratch3, addr);
11499 eoschar = force_reg (QImode, eoschar);
11501 emit_insn (gen_cld ());
11504 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11505 align, scratch4, scratch3));
11506 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11507 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11511 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11512 align, scratch4, scratch3));
11513 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11514 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11520 /* Expand the appropriate insns for doing strlen if not just doing
11523 out = result, initialized with the start address
11524 align_rtx = alignment of the address.
11525 scratch = scratch register, initialized with the startaddress when
11526 not aligned, otherwise undefined
11528 This is just the body. It needs the initialisations mentioned above and
11529 some address computing at the end. These things are done in i386.md. */
11532 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11533 rtx out, align_rtx;
11537 rtx align_2_label = NULL_RTX;
11538 rtx align_3_label = NULL_RTX;
11539 rtx align_4_label = gen_label_rtx ();
11540 rtx end_0_label = gen_label_rtx ();
11542 rtx tmpreg = gen_reg_rtx (SImode);
11543 rtx scratch = gen_reg_rtx (SImode);
11547 if (GET_CODE (align_rtx) == CONST_INT)
11548 align = INTVAL (align_rtx);
11550 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11552 /* Is there a known alignment and is it less than 4? */
11555 rtx scratch1 = gen_reg_rtx (Pmode);
11556 emit_move_insn (scratch1, out);
11557 /* Is there a known alignment and is it not 2? */
11560 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11561 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11563 /* Leave just the 3 lower bits. */
11564 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11565 NULL_RTX, 0, OPTAB_WIDEN);
11567 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11568 Pmode, 1, align_4_label);
11569 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11570 Pmode, 1, align_2_label);
11571 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11572 Pmode, 1, align_3_label);
11576 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11577 check if is aligned to 4 - byte. */
11579 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11580 NULL_RTX, 0, OPTAB_WIDEN);
11582 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11583 Pmode, 1, align_4_label);
11586 mem = gen_rtx_MEM (QImode, out);
11588 /* Now compare the bytes. */
11590 /* Compare the first n unaligned byte on a byte per byte basis. */
11591 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11592 QImode, 1, end_0_label);
11594 /* Increment the address. */
11596 emit_insn (gen_adddi3 (out, out, const1_rtx));
11598 emit_insn (gen_addsi3 (out, out, const1_rtx));
11600 /* Not needed with an alignment of 2 */
11603 emit_label (align_2_label);
11605 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11609 emit_insn (gen_adddi3 (out, out, const1_rtx));
11611 emit_insn (gen_addsi3 (out, out, const1_rtx));
11613 emit_label (align_3_label);
11616 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11620 emit_insn (gen_adddi3 (out, out, const1_rtx));
11622 emit_insn (gen_addsi3 (out, out, const1_rtx));
11625 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11626 align this loop. It gives only huge programs, but does not help to
11628 emit_label (align_4_label);
11630 mem = gen_rtx_MEM (SImode, out);
11631 emit_move_insn (scratch, mem);
11633 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11635 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11637 /* This formula yields a nonzero result iff one of the bytes is zero.
11638 This saves three branches inside loop and many cycles. */
11640 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11641 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11642 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11643 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11644 gen_int_mode (0x80808080, SImode)));
11645 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11650 rtx reg = gen_reg_rtx (SImode);
11651 rtx reg2 = gen_reg_rtx (Pmode);
11652 emit_move_insn (reg, tmpreg);
11653 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11655 /* If zero is not in the first two bytes, move two bytes forward. */
11656 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11657 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11658 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11659 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11660 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11663 /* Emit lea manually to avoid clobbering of flags. */
11664 emit_insn (gen_rtx_SET (SImode, reg2,
11665 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11667 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11668 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11669 emit_insn (gen_rtx_SET (VOIDmode, out,
11670 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11677 rtx end_2_label = gen_label_rtx ();
11678 /* Is zero in the first two bytes? */
11680 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11681 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11682 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11683 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11684 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11686 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11687 JUMP_LABEL (tmp) = end_2_label;
11689 /* Not in the first two. Move two bytes forward. */
11690 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11692 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11694 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11696 emit_label (end_2_label);
11700 /* Avoid branch in fixing the byte. */
11701 tmpreg = gen_lowpart (QImode, tmpreg);
11702 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11703 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11705 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11707 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11709 emit_label (end_0_label);
11713 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11714 rtx retval, fnaddr, callarg1, callarg2, pop;
11717 rtx use = NULL, call;
11719 if (pop == const0_rtx)
11721 if (TARGET_64BIT && pop)
11725 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11726 fnaddr = machopic_indirect_call_target (fnaddr);
11728 /* Static functions and indirect calls don't need the pic register. */
11729 if (! TARGET_64BIT && flag_pic
11730 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11731 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11732 use_reg (&use, pic_offset_table_rtx);
11734 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11736 rtx al = gen_rtx_REG (QImode, 0);
11737 emit_move_insn (al, callarg2);
11738 use_reg (&use, al);
11740 #endif /* TARGET_MACHO */
11742 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11744 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11745 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11747 if (sibcall && TARGET_64BIT
11748 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11751 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11752 fnaddr = gen_rtx_REG (Pmode, 40);
11753 emit_move_insn (fnaddr, addr);
11754 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11757 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11759 call = gen_rtx_SET (VOIDmode, retval, call);
11762 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11763 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11764 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11767 call = emit_call_insn (call);
11769 CALL_INSN_FUNCTION_USAGE (call) = use;
11773 /* Clear stack slot assignments remembered from previous functions.
11774 This is called from INIT_EXPANDERS once before RTL is emitted for each
11777 static struct machine_function *
11778 ix86_init_machine_status ()
11780 struct machine_function *f;
11782 f = ggc_alloc_cleared (sizeof (struct machine_function));
11783 f->use_fast_prologue_epilogue_nregs = -1;
11788 /* Return a MEM corresponding to a stack slot with mode MODE.
11789 Allocate a new slot if necessary.
11791 The RTL for a function can have several slots available: N is
11792 which slot to use. */
11795 assign_386_stack_local (mode, n)
11796 enum machine_mode mode;
11799 struct stack_local_entry *s;
11801 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11804 for (s = ix86_stack_locals; s; s = s->next)
11805 if (s->mode == mode && s->n == n)
11808 s = (struct stack_local_entry *)
11809 ggc_alloc (sizeof (struct stack_local_entry));
11812 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11814 s->next = ix86_stack_locals;
11815 ix86_stack_locals = s;
11819 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11821 static GTY(()) rtx ix86_tls_symbol;
11823 ix86_tls_get_addr ()
11826 if (!ix86_tls_symbol)
11828 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11829 (TARGET_GNU_TLS && !TARGET_64BIT)
11830 ? "___tls_get_addr"
11831 : "__tls_get_addr");
11834 return ix86_tls_symbol;
11837 /* Calculate the length of the memory address in the instruction
11838 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11841 memory_address_length (addr)
11844 struct ix86_address parts;
11845 rtx base, index, disp;
11848 if (GET_CODE (addr) == PRE_DEC
11849 || GET_CODE (addr) == POST_INC
11850 || GET_CODE (addr) == PRE_MODIFY
11851 || GET_CODE (addr) == POST_MODIFY)
11854 if (! ix86_decompose_address (addr, &parts))
11858 index = parts.index;
11862 /* Register Indirect. */
11863 if (base && !index && !disp)
11865 /* Special cases: ebp and esp need the two-byte modrm form. */
11866 if (addr == stack_pointer_rtx
11867 || addr == arg_pointer_rtx
11868 || addr == frame_pointer_rtx
11869 || addr == hard_frame_pointer_rtx)
11873 /* Direct Addressing. */
11874 else if (disp && !base && !index)
11879 /* Find the length of the displacement constant. */
11882 if (GET_CODE (disp) == CONST_INT
11883 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
11890 /* An index requires the two-byte modrm form. */
11898 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11899 is set, expect that insn have 8bit immediate alternative. */
11901 ix86_attr_length_immediate_default (insn, shortform)
11907 extract_insn_cached (insn);
11908 for (i = recog_data.n_operands - 1; i >= 0; --i)
11909 if (CONSTANT_P (recog_data.operand[i]))
11914 && GET_CODE (recog_data.operand[i]) == CONST_INT
11915 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11919 switch (get_attr_mode (insn))
11930 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11935 fatal_insn ("unknown insn mode", insn);
11941 /* Compute default value for "length_address" attribute. */
11943 ix86_attr_length_address_default (insn)
11948 if (get_attr_type (insn) == TYPE_LEA)
11950 rtx set = PATTERN (insn);
11951 if (GET_CODE (set) == SET)
11953 else if (GET_CODE (set) == PARALLEL
11954 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
11955 set = XVECEXP (set, 0, 0);
11958 #ifdef ENABLE_CHECKING
11964 return memory_address_length (SET_SRC (set));
11967 extract_insn_cached (insn);
11968 for (i = recog_data.n_operands - 1; i >= 0; --i)
11969 if (GET_CODE (recog_data.operand[i]) == MEM)
11971 return memory_address_length (XEXP (recog_data.operand[i], 0));
11977 /* Return the maximum number of instructions a cpu can issue. */
11984 case PROCESSOR_PENTIUM:
11988 case PROCESSOR_PENTIUMPRO:
11989 case PROCESSOR_PENTIUM4:
11990 case PROCESSOR_ATHLON:
11999 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12000 by DEP_INSN and nothing set by DEP_INSN. */
12003 ix86_flags_dependant (insn, dep_insn, insn_type)
12004 rtx insn, dep_insn;
12005 enum attr_type insn_type;
12009 /* Simplify the test for uninteresting insns. */
12010 if (insn_type != TYPE_SETCC
12011 && insn_type != TYPE_ICMOV
12012 && insn_type != TYPE_FCMOV
12013 && insn_type != TYPE_IBR)
12016 if ((set = single_set (dep_insn)) != 0)
12018 set = SET_DEST (set);
12021 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12022 && XVECLEN (PATTERN (dep_insn), 0) == 2
12023 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12024 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12026 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12027 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12032 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12035 /* This test is true if the dependent insn reads the flags but
12036 not any other potentially set register. */
12037 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12040 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12046 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12047 address with operands set by DEP_INSN. */
12050 ix86_agi_dependant (insn, dep_insn, insn_type)
12051 rtx insn, dep_insn;
12052 enum attr_type insn_type;
12056 if (insn_type == TYPE_LEA
12059 addr = PATTERN (insn);
12060 if (GET_CODE (addr) == SET)
12062 else if (GET_CODE (addr) == PARALLEL
12063 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12064 addr = XVECEXP (addr, 0, 0);
12067 addr = SET_SRC (addr);
12072 extract_insn_cached (insn);
12073 for (i = recog_data.n_operands - 1; i >= 0; --i)
12074 if (GET_CODE (recog_data.operand[i]) == MEM)
12076 addr = XEXP (recog_data.operand[i], 0);
12083 return modified_in_p (addr, dep_insn);
12087 ix86_adjust_cost (insn, link, dep_insn, cost)
12088 rtx insn, link, dep_insn;
12091 enum attr_type insn_type, dep_insn_type;
12092 enum attr_memory memory, dep_memory;
12094 int dep_insn_code_number;
12096 /* Anti and output dependencies have zero cost on all CPUs. */
12097 if (REG_NOTE_KIND (link) != 0)
12100 dep_insn_code_number = recog_memoized (dep_insn);
12102 /* If we can't recognize the insns, we can't really do anything. */
12103 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12106 insn_type = get_attr_type (insn);
12107 dep_insn_type = get_attr_type (dep_insn);
12111 case PROCESSOR_PENTIUM:
12112 /* Address Generation Interlock adds a cycle of latency. */
12113 if (ix86_agi_dependant (insn, dep_insn, insn_type))
12116 /* ??? Compares pair with jump/setcc. */
12117 if (ix86_flags_dependant (insn, dep_insn, insn_type))
12120 /* Floating point stores require value to be ready one cycle earlier. */
12121 if (insn_type == TYPE_FMOV
12122 && get_attr_memory (insn) == MEMORY_STORE
12123 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12127 case PROCESSOR_PENTIUMPRO:
12128 memory = get_attr_memory (insn);
12129 dep_memory = get_attr_memory (dep_insn);
12131 /* Since we can't represent delayed latencies of load+operation,
12132 increase the cost here for non-imov insns. */
12133 if (dep_insn_type != TYPE_IMOV
12134 && dep_insn_type != TYPE_FMOV
12135 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12138 /* INT->FP conversion is expensive. */
12139 if (get_attr_fp_int_src (dep_insn))
12142 /* There is one cycle extra latency between an FP op and a store. */
12143 if (insn_type == TYPE_FMOV
12144 && (set = single_set (dep_insn)) != NULL_RTX
12145 && (set2 = single_set (insn)) != NULL_RTX
12146 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12147 && GET_CODE (SET_DEST (set2)) == MEM)
12150 /* Show ability of reorder buffer to hide latency of load by executing
12151 in parallel with previous instruction in case
12152 previous instruction is not needed to compute the address. */
12153 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12154 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12156 /* Claim moves to take one cycle, as core can issue one load
12157 at time and the next load can start cycle later. */
12158 if (dep_insn_type == TYPE_IMOV
12159 || dep_insn_type == TYPE_FMOV)
12167 memory = get_attr_memory (insn);
12168 dep_memory = get_attr_memory (dep_insn);
12169 /* The esp dependency is resolved before the instruction is really
12171 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12172 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12175 /* Since we can't represent delayed latencies of load+operation,
12176 increase the cost here for non-imov insns. */
12177 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12178 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12180 /* INT->FP conversion is expensive. */
12181 if (get_attr_fp_int_src (dep_insn))
12184 /* Show ability of reorder buffer to hide latency of load by executing
12185 in parallel with previous instruction in case
12186 previous instruction is not needed to compute the address. */
12187 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12188 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12190 /* Claim moves to take one cycle, as core can issue one load
12191 at time and the next load can start cycle later. */
12192 if (dep_insn_type == TYPE_IMOV
12193 || dep_insn_type == TYPE_FMOV)
12202 case PROCESSOR_ATHLON:
12204 memory = get_attr_memory (insn);
12205 dep_memory = get_attr_memory (dep_insn);
12207 /* Show ability of reorder buffer to hide latency of load by executing
12208 in parallel with previous instruction in case
12209 previous instruction is not needed to compute the address. */
12210 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12211 && !ix86_agi_dependant (insn, dep_insn, insn_type))
12213 enum attr_unit unit = get_attr_unit (insn);
12216 /* Because of the difference between the length of integer and
12217 floating unit pipeline preparation stages, the memory operands
12218 for floating point are cheaper.
12220 ??? For Athlon it the difference is most propbably 2. */
12221 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12224 loadcost = TARGET_ATHLON ? 2 : 0;
12226 if (cost >= loadcost)
12241 struct ppro_sched_data
12244 int issued_this_cycle;
12248 static enum attr_ppro_uops
12249 ix86_safe_ppro_uops (insn)
12252 if (recog_memoized (insn) >= 0)
12253 return get_attr_ppro_uops (insn);
12255 return PPRO_UOPS_MANY;
12259 ix86_dump_ppro_packet (dump)
12262 if (ix86_sched_data.ppro.decode[0])
12264 fprintf (dump, "PPRO packet: %d",
12265 INSN_UID (ix86_sched_data.ppro.decode[0]));
12266 if (ix86_sched_data.ppro.decode[1])
12267 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12268 if (ix86_sched_data.ppro.decode[2])
12269 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12270 fputc ('\n', dump);
12274 /* We're beginning a new block. Initialize data structures as necessary. */
12277 ix86_sched_init (dump, sched_verbose, veclen)
12278 FILE *dump ATTRIBUTE_UNUSED;
12279 int sched_verbose ATTRIBUTE_UNUSED;
12280 int veclen ATTRIBUTE_UNUSED;
12282 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12285 /* Shift INSN to SLOT, and shift everything else down. */
12288 ix86_reorder_insn (insnp, slot)
12295 insnp[0] = insnp[1];
12296 while (++insnp != slot);
12302 ix86_sched_reorder_ppro (ready, e_ready)
12307 enum attr_ppro_uops cur_uops;
12308 int issued_this_cycle;
12312 /* At this point .ppro.decode contains the state of the three
12313 decoders from last "cycle". That is, those insns that were
12314 actually independent. But here we're scheduling for the
12315 decoder, and we may find things that are decodable in the
12318 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12319 issued_this_cycle = 0;
12322 cur_uops = ix86_safe_ppro_uops (*insnp);
12324 /* If the decoders are empty, and we've a complex insn at the
12325 head of the priority queue, let it issue without complaint. */
12326 if (decode[0] == NULL)
12328 if (cur_uops == PPRO_UOPS_MANY)
12330 decode[0] = *insnp;
12334 /* Otherwise, search for a 2-4 uop unsn to issue. */
12335 while (cur_uops != PPRO_UOPS_FEW)
12337 if (insnp == ready)
12339 cur_uops = ix86_safe_ppro_uops (*--insnp);
12342 /* If so, move it to the head of the line. */
12343 if (cur_uops == PPRO_UOPS_FEW)
12344 ix86_reorder_insn (insnp, e_ready);
12346 /* Issue the head of the queue. */
12347 issued_this_cycle = 1;
12348 decode[0] = *e_ready--;
12351 /* Look for simple insns to fill in the other two slots. */
12352 for (i = 1; i < 3; ++i)
12353 if (decode[i] == NULL)
12355 if (ready > e_ready)
12359 cur_uops = ix86_safe_ppro_uops (*insnp);
12360 while (cur_uops != PPRO_UOPS_ONE)
12362 if (insnp == ready)
12364 cur_uops = ix86_safe_ppro_uops (*--insnp);
12367 /* Found one. Move it to the head of the queue and issue it. */
12368 if (cur_uops == PPRO_UOPS_ONE)
12370 ix86_reorder_insn (insnp, e_ready);
12371 decode[i] = *e_ready--;
12372 issued_this_cycle++;
12376 /* ??? Didn't find one. Ideally, here we would do a lazy split
12377 of 2-uop insns, issue one and queue the other. */
12381 if (issued_this_cycle == 0)
12382 issued_this_cycle = 1;
12383 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12386 /* We are about to being issuing insns for this clock cycle.
12387 Override the default sort algorithm to better slot instructions. */
12389 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12390 FILE *dump ATTRIBUTE_UNUSED;
12391 int sched_verbose ATTRIBUTE_UNUSED;
12394 int clock_var ATTRIBUTE_UNUSED;
12396 int n_ready = *n_readyp;
12397 rtx *e_ready = ready + n_ready - 1;
12399 /* Make sure to go ahead and initialize key items in
12400 ix86_sched_data if we are not going to bother trying to
12401 reorder the ready queue. */
12404 ix86_sched_data.ppro.issued_this_cycle = 1;
12413 case PROCESSOR_PENTIUMPRO:
12414 ix86_sched_reorder_ppro (ready, e_ready);
12419 return ix86_issue_rate ();
12422 /* We are about to issue INSN. Return the number of insns left on the
12423 ready queue that can be issued this cycle. */
12426 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12430 int can_issue_more;
12436 return can_issue_more - 1;
12438 case PROCESSOR_PENTIUMPRO:
12440 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12442 if (uops == PPRO_UOPS_MANY)
12445 ix86_dump_ppro_packet (dump);
12446 ix86_sched_data.ppro.decode[0] = insn;
12447 ix86_sched_data.ppro.decode[1] = NULL;
12448 ix86_sched_data.ppro.decode[2] = NULL;
12450 ix86_dump_ppro_packet (dump);
12451 ix86_sched_data.ppro.decode[0] = NULL;
12453 else if (uops == PPRO_UOPS_FEW)
12456 ix86_dump_ppro_packet (dump);
12457 ix86_sched_data.ppro.decode[0] = insn;
12458 ix86_sched_data.ppro.decode[1] = NULL;
12459 ix86_sched_data.ppro.decode[2] = NULL;
12463 for (i = 0; i < 3; ++i)
12464 if (ix86_sched_data.ppro.decode[i] == NULL)
12466 ix86_sched_data.ppro.decode[i] = insn;
12474 ix86_dump_ppro_packet (dump);
12475 ix86_sched_data.ppro.decode[0] = NULL;
12476 ix86_sched_data.ppro.decode[1] = NULL;
12477 ix86_sched_data.ppro.decode[2] = NULL;
12481 return --ix86_sched_data.ppro.issued_this_cycle;
12486 ia32_use_dfa_pipeline_interface ()
12488 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12493 /* How many alternative schedules to try. This should be as wide as the
12494 scheduling freedom in the DFA, but no wider. Making this value too
12495 large results extra work for the scheduler. */
12498 ia32_multipass_dfa_lookahead ()
12500 if (ix86_tune == PROCESSOR_PENTIUM)
12507 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12508 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12512 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12514 rtx dstref, srcref, dstreg, srcreg;
12518 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12520 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12524 /* Subroutine of above to actually do the updating by recursively walking
12528 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12530 rtx dstref, srcref, dstreg, srcreg;
12532 enum rtx_code code = GET_CODE (x);
12533 const char *format_ptr = GET_RTX_FORMAT (code);
12536 if (code == MEM && XEXP (x, 0) == dstreg)
12537 MEM_COPY_ATTRIBUTES (x, dstref);
12538 else if (code == MEM && XEXP (x, 0) == srcreg)
12539 MEM_COPY_ATTRIBUTES (x, srcref);
12541 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12543 if (*format_ptr == 'e')
12544 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12546 else if (*format_ptr == 'E')
12547 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12548 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12553 /* Compute the alignment given to a constant that is being placed in memory.
12554 EXP is the constant and ALIGN is the alignment that the object would
12556 The value of this function is used instead of that alignment to align
12560 ix86_constant_alignment (exp, align)
12564 if (TREE_CODE (exp) == REAL_CST)
12566 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12568 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12571 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12578 /* Compute the alignment for a static variable.
12579 TYPE is the data type, and ALIGN is the alignment that
12580 the object would ordinarily have. The value of this function is used
12581 instead of that alignment to align the object. */
12584 ix86_data_alignment (type, align)
12588 if (AGGREGATE_TYPE_P (type)
12589 && TYPE_SIZE (type)
12590 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12591 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12592 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12595 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12596 to 16byte boundary. */
12599 if (AGGREGATE_TYPE_P (type)
12600 && TYPE_SIZE (type)
12601 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12602 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12603 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12607 if (TREE_CODE (type) == ARRAY_TYPE)
12609 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12611 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12614 else if (TREE_CODE (type) == COMPLEX_TYPE)
12617 if (TYPE_MODE (type) == DCmode && align < 64)
12619 if (TYPE_MODE (type) == XCmode && align < 128)
12622 else if ((TREE_CODE (type) == RECORD_TYPE
12623 || TREE_CODE (type) == UNION_TYPE
12624 || TREE_CODE (type) == QUAL_UNION_TYPE)
12625 && TYPE_FIELDS (type))
12627 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12629 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12632 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12633 || TREE_CODE (type) == INTEGER_TYPE)
12635 if (TYPE_MODE (type) == DFmode && align < 64)
12637 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12644 /* Compute the alignment for a local variable.
12645 TYPE is the data type, and ALIGN is the alignment that
12646 the object would ordinarily have. The value of this macro is used
12647 instead of that alignment to align the object. */
12650 ix86_local_alignment (type, align)
12654 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12655 to 16byte boundary. */
12658 if (AGGREGATE_TYPE_P (type)
12659 && TYPE_SIZE (type)
12660 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12661 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12662 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12665 if (TREE_CODE (type) == ARRAY_TYPE)
12667 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12669 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12672 else if (TREE_CODE (type) == COMPLEX_TYPE)
12674 if (TYPE_MODE (type) == DCmode && align < 64)
12676 if (TYPE_MODE (type) == XCmode && align < 128)
12679 else if ((TREE_CODE (type) == RECORD_TYPE
12680 || TREE_CODE (type) == UNION_TYPE
12681 || TREE_CODE (type) == QUAL_UNION_TYPE)
12682 && TYPE_FIELDS (type))
12684 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12686 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12689 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12690 || TREE_CODE (type) == INTEGER_TYPE)
12693 if (TYPE_MODE (type) == DFmode && align < 64)
12695 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12701 /* Emit RTL insns to initialize the variable parts of a trampoline.
12702 FNADDR is an RTX for the address of the function's pure code.
12703 CXT is an RTX for the static chain value for the function. */
12705 x86_initialize_trampoline (tramp, fnaddr, cxt)
12706 rtx tramp, fnaddr, cxt;
12710 /* Compute offset from the end of the jmp to the target function. */
12711 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12712 plus_constant (tramp, 10),
12713 NULL_RTX, 1, OPTAB_DIRECT);
12714 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12715 gen_int_mode (0xb9, QImode));
12716 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12717 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12718 gen_int_mode (0xe9, QImode));
12719 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12724 /* Try to load address using shorter movl instead of movabs.
12725 We may want to support movq for kernel mode, but kernel does not use
12726 trampolines at the moment. */
12727 if (x86_64_zero_extended_value (fnaddr))
12729 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12730 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12731 gen_int_mode (0xbb41, HImode));
12732 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12733 gen_lowpart (SImode, fnaddr));
12738 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12739 gen_int_mode (0xbb49, HImode));
12740 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12744 /* Load static chain using movabs to r10. */
12745 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12746 gen_int_mode (0xba49, HImode));
12747 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12750 /* Jump to the r11 */
12751 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12752 gen_int_mode (0xff49, HImode));
12753 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12754 gen_int_mode (0xe3, QImode));
12756 if (offset > TRAMPOLINE_SIZE)
12760 #ifdef TRANSFER_FROM_TRAMPOLINE
12761 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12762 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12766 #define def_builtin(MASK, NAME, TYPE, CODE) \
12768 if ((MASK) & target_flags \
12769 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
12770 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12771 NULL, NULL_TREE); \
12774 struct builtin_description
12776 const unsigned int mask;
12777 const enum insn_code icode;
12778 const char *const name;
12779 const enum ix86_builtins code;
12780 const enum rtx_code comparison;
12781 const unsigned int flag;
12784 static const struct builtin_description bdesc_comi[] =
12786 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12787 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12788 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12789 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12790 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12791 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12792 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12793 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12794 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12795 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12796 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12797 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12798 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12799 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12800 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12801 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12802 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12807 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12808 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12809 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12812 static const struct builtin_description bdesc_2arg[] =
12815 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12816 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12817 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12818 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12819 { MASK_SSE, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12820 { MASK_SSE, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12821 { MASK_SSE, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12822 { MASK_SSE, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12824 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12825 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12826 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12827 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12828 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12829 { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12830 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12831 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12832 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12833 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12834 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12835 { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12836 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12837 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12838 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12839 { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12840 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12841 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12842 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12843 { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12845 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12846 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12847 { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12848 { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12850 { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12851 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12852 { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12853 { MASK_SSE, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12855 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12856 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12857 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12858 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12859 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12862 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12863 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12864 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12865 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12866 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12867 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12868 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12869 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12871 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12872 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12873 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12874 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12875 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12876 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12877 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12878 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12880 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12881 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12882 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12884 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12885 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12886 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12887 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12889 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12890 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12892 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12893 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12894 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12895 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12896 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12897 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12899 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12900 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12901 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12902 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12904 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12905 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12906 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12907 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12908 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12909 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12912 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12913 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12914 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12916 { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12917 { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12918 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12920 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12921 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12922 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12923 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12924 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12925 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12927 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12928 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12929 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12930 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12931 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12932 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12934 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12935 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12936 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12937 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12939 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12940 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12943 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12944 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12945 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12946 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12947 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12948 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12949 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12950 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12952 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12953 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12954 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12955 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12956 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12957 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12958 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12959 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12960 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12961 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12962 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12963 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12964 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12965 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12966 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12967 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12968 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12969 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12970 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12971 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12973 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12974 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12975 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12976 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12978 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12979 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12980 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12981 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12983 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12984 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12985 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12988 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12989 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12990 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12991 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12992 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12993 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12994 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12995 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12997 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12998 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12999 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13000 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13001 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13002 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13003 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13004 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13006 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13007 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13008 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13009 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13011 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13012 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13013 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13014 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13016 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13017 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13019 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13020 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13021 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13022 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13023 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13024 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13026 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13027 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13028 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13029 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13031 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13032 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13033 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13034 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13035 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13036 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13037 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13038 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13040 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13041 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13042 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13044 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13045 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13047 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13048 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13049 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13050 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13051 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13052 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13054 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13055 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13056 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13057 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13058 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13059 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13061 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13062 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13063 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13064 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13066 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13068 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13069 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13070 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13071 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13074 { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13075 { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13076 { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13077 { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13078 { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13079 { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13082 static const struct builtin_description bdesc_1arg[] =
13084 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13085 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13087 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13088 { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13089 { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13091 { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13092 { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13093 { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13094 { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13095 { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13096 { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13098 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13099 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13100 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13101 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13103 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13105 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13106 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13108 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13109 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13110 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13111 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13112 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13114 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13116 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13117 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13118 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13119 { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13121 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13122 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13123 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13125 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13128 { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13129 { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13130 { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13134 ix86_init_builtins ()
13137 ix86_init_mmx_sse_builtins ();
13140 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
13141 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
13144 ix86_init_mmx_sse_builtins ()
13146 const struct builtin_description * d;
13149 tree pchar_type_node = build_pointer_type (char_type_node);
13150 tree pcchar_type_node = build_pointer_type (
13151 build_type_variant (char_type_node, 1, 0));
13152 tree pfloat_type_node = build_pointer_type (float_type_node);
13153 tree pcfloat_type_node = build_pointer_type (
13154 build_type_variant (float_type_node, 1, 0));
13155 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13156 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13157 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13160 tree int_ftype_v4sf_v4sf
13161 = build_function_type_list (integer_type_node,
13162 V4SF_type_node, V4SF_type_node, NULL_TREE);
13163 tree v4si_ftype_v4sf_v4sf
13164 = build_function_type_list (V4SI_type_node,
13165 V4SF_type_node, V4SF_type_node, NULL_TREE);
13166 /* MMX/SSE/integer conversions. */
13167 tree int_ftype_v4sf
13168 = build_function_type_list (integer_type_node,
13169 V4SF_type_node, NULL_TREE);
13170 tree int64_ftype_v4sf
13171 = build_function_type_list (long_long_integer_type_node,
13172 V4SF_type_node, NULL_TREE);
13173 tree int_ftype_v8qi
13174 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13175 tree v4sf_ftype_v4sf_int
13176 = build_function_type_list (V4SF_type_node,
13177 V4SF_type_node, integer_type_node, NULL_TREE);
13178 tree v4sf_ftype_v4sf_int64
13179 = build_function_type_list (V4SF_type_node,
13180 V4SF_type_node, long_long_integer_type_node,
13182 tree v4sf_ftype_v4sf_v2si
13183 = build_function_type_list (V4SF_type_node,
13184 V4SF_type_node, V2SI_type_node, NULL_TREE);
13185 tree int_ftype_v4hi_int
13186 = build_function_type_list (integer_type_node,
13187 V4HI_type_node, integer_type_node, NULL_TREE);
13188 tree v4hi_ftype_v4hi_int_int
13189 = build_function_type_list (V4HI_type_node, V4HI_type_node,
13190 integer_type_node, integer_type_node,
13192 /* Miscellaneous. */
13193 tree v8qi_ftype_v4hi_v4hi
13194 = build_function_type_list (V8QI_type_node,
13195 V4HI_type_node, V4HI_type_node, NULL_TREE);
13196 tree v4hi_ftype_v2si_v2si
13197 = build_function_type_list (V4HI_type_node,
13198 V2SI_type_node, V2SI_type_node, NULL_TREE);
13199 tree v4sf_ftype_v4sf_v4sf_int
13200 = build_function_type_list (V4SF_type_node,
13201 V4SF_type_node, V4SF_type_node,
13202 integer_type_node, NULL_TREE);
13203 tree v2si_ftype_v4hi_v4hi
13204 = build_function_type_list (V2SI_type_node,
13205 V4HI_type_node, V4HI_type_node, NULL_TREE);
13206 tree v4hi_ftype_v4hi_int
13207 = build_function_type_list (V4HI_type_node,
13208 V4HI_type_node, integer_type_node, NULL_TREE);
13209 tree v4hi_ftype_v4hi_di
13210 = build_function_type_list (V4HI_type_node,
13211 V4HI_type_node, long_long_unsigned_type_node,
13213 tree v2si_ftype_v2si_di
13214 = build_function_type_list (V2SI_type_node,
13215 V2SI_type_node, long_long_unsigned_type_node,
13217 tree void_ftype_void
13218 = build_function_type (void_type_node, void_list_node);
13219 tree void_ftype_unsigned
13220 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13221 tree void_ftype_unsigned_unsigned
13222 = build_function_type_list (void_type_node, unsigned_type_node,
13223 unsigned_type_node, NULL_TREE);
13224 tree void_ftype_pcvoid_unsigned_unsigned
13225 = build_function_type_list (void_type_node, const_ptr_type_node,
13226 unsigned_type_node, unsigned_type_node,
13228 tree unsigned_ftype_void
13229 = build_function_type (unsigned_type_node, void_list_node);
13231 = build_function_type (long_long_unsigned_type_node, void_list_node);
13232 tree v4sf_ftype_void
13233 = build_function_type (V4SF_type_node, void_list_node);
13234 tree v2si_ftype_v4sf
13235 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13236 /* Loads/stores. */
13237 tree void_ftype_v8qi_v8qi_pchar
13238 = build_function_type_list (void_type_node,
13239 V8QI_type_node, V8QI_type_node,
13240 pchar_type_node, NULL_TREE);
13241 tree v4sf_ftype_pcfloat
13242 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13243 /* @@@ the type is bogus */
13244 tree v4sf_ftype_v4sf_pv2si
13245 = build_function_type_list (V4SF_type_node,
13246 V4SF_type_node, pv2si_type_node, NULL_TREE);
13247 tree void_ftype_pv2si_v4sf
13248 = build_function_type_list (void_type_node,
13249 pv2si_type_node, V4SF_type_node, NULL_TREE);
13250 tree void_ftype_pfloat_v4sf
13251 = build_function_type_list (void_type_node,
13252 pfloat_type_node, V4SF_type_node, NULL_TREE);
13253 tree void_ftype_pdi_di
13254 = build_function_type_list (void_type_node,
13255 pdi_type_node, long_long_unsigned_type_node,
13257 tree void_ftype_pv2di_v2di
13258 = build_function_type_list (void_type_node,
13259 pv2di_type_node, V2DI_type_node, NULL_TREE);
13260 /* Normal vector unops. */
13261 tree v4sf_ftype_v4sf
13262 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13264 /* Normal vector binops. */
13265 tree v4sf_ftype_v4sf_v4sf
13266 = build_function_type_list (V4SF_type_node,
13267 V4SF_type_node, V4SF_type_node, NULL_TREE);
13268 tree v8qi_ftype_v8qi_v8qi
13269 = build_function_type_list (V8QI_type_node,
13270 V8QI_type_node, V8QI_type_node, NULL_TREE);
13271 tree v4hi_ftype_v4hi_v4hi
13272 = build_function_type_list (V4HI_type_node,
13273 V4HI_type_node, V4HI_type_node, NULL_TREE);
13274 tree v2si_ftype_v2si_v2si
13275 = build_function_type_list (V2SI_type_node,
13276 V2SI_type_node, V2SI_type_node, NULL_TREE);
13277 tree di_ftype_di_di
13278 = build_function_type_list (long_long_unsigned_type_node,
13279 long_long_unsigned_type_node,
13280 long_long_unsigned_type_node, NULL_TREE);
13282 tree v2si_ftype_v2sf
13283 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13284 tree v2sf_ftype_v2si
13285 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13286 tree v2si_ftype_v2si
13287 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13288 tree v2sf_ftype_v2sf
13289 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13290 tree v2sf_ftype_v2sf_v2sf
13291 = build_function_type_list (V2SF_type_node,
13292 V2SF_type_node, V2SF_type_node, NULL_TREE);
13293 tree v2si_ftype_v2sf_v2sf
13294 = build_function_type_list (V2SI_type_node,
13295 V2SF_type_node, V2SF_type_node, NULL_TREE);
13296 tree pint_type_node = build_pointer_type (integer_type_node);
13297 tree pcint_type_node = build_pointer_type (
13298 build_type_variant (integer_type_node, 1, 0));
13299 tree pdouble_type_node = build_pointer_type (double_type_node);
13300 tree pcdouble_type_node = build_pointer_type (
13301 build_type_variant (double_type_node, 1, 0));
13302 tree int_ftype_v2df_v2df
13303 = build_function_type_list (integer_type_node,
13304 V2DF_type_node, V2DF_type_node, NULL_TREE);
13307 = build_function_type (intTI_type_node, void_list_node);
13308 tree v2di_ftype_void
13309 = build_function_type (V2DI_type_node, void_list_node);
13310 tree ti_ftype_ti_ti
13311 = build_function_type_list (intTI_type_node,
13312 intTI_type_node, intTI_type_node, NULL_TREE);
13313 tree void_ftype_pcvoid
13314 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13316 = build_function_type_list (V2DI_type_node,
13317 long_long_unsigned_type_node, NULL_TREE);
13319 = build_function_type_list (long_long_unsigned_type_node,
13320 V2DI_type_node, NULL_TREE);
13321 tree v4sf_ftype_v4si
13322 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13323 tree v4si_ftype_v4sf
13324 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13325 tree v2df_ftype_v4si
13326 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13327 tree v4si_ftype_v2df
13328 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13329 tree v2si_ftype_v2df
13330 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13331 tree v4sf_ftype_v2df
13332 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13333 tree v2df_ftype_v2si
13334 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13335 tree v2df_ftype_v4sf
13336 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13337 tree int_ftype_v2df
13338 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13339 tree int64_ftype_v2df
13340 = build_function_type_list (long_long_integer_type_node,
13341 V2DF_type_node, NULL_TREE);
13342 tree v2df_ftype_v2df_int
13343 = build_function_type_list (V2DF_type_node,
13344 V2DF_type_node, integer_type_node, NULL_TREE);
13345 tree v2df_ftype_v2df_int64
13346 = build_function_type_list (V2DF_type_node,
13347 V2DF_type_node, long_long_integer_type_node,
13349 tree v4sf_ftype_v4sf_v2df
13350 = build_function_type_list (V4SF_type_node,
13351 V4SF_type_node, V2DF_type_node, NULL_TREE);
13352 tree v2df_ftype_v2df_v4sf
13353 = build_function_type_list (V2DF_type_node,
13354 V2DF_type_node, V4SF_type_node, NULL_TREE);
13355 tree v2df_ftype_v2df_v2df_int
13356 = build_function_type_list (V2DF_type_node,
13357 V2DF_type_node, V2DF_type_node,
13360 tree v2df_ftype_v2df_pv2si
13361 = build_function_type_list (V2DF_type_node,
13362 V2DF_type_node, pv2si_type_node, NULL_TREE);
13363 tree void_ftype_pv2si_v2df
13364 = build_function_type_list (void_type_node,
13365 pv2si_type_node, V2DF_type_node, NULL_TREE);
13366 tree void_ftype_pdouble_v2df
13367 = build_function_type_list (void_type_node,
13368 pdouble_type_node, V2DF_type_node, NULL_TREE);
13369 tree void_ftype_pint_int
13370 = build_function_type_list (void_type_node,
13371 pint_type_node, integer_type_node, NULL_TREE);
13372 tree void_ftype_v16qi_v16qi_pchar
13373 = build_function_type_list (void_type_node,
13374 V16QI_type_node, V16QI_type_node,
13375 pchar_type_node, NULL_TREE);
13376 tree v2df_ftype_pcdouble
13377 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13378 tree v2df_ftype_v2df_v2df
13379 = build_function_type_list (V2DF_type_node,
13380 V2DF_type_node, V2DF_type_node, NULL_TREE);
13381 tree v16qi_ftype_v16qi_v16qi
13382 = build_function_type_list (V16QI_type_node,
13383 V16QI_type_node, V16QI_type_node, NULL_TREE);
13384 tree v8hi_ftype_v8hi_v8hi
13385 = build_function_type_list (V8HI_type_node,
13386 V8HI_type_node, V8HI_type_node, NULL_TREE);
13387 tree v4si_ftype_v4si_v4si
13388 = build_function_type_list (V4SI_type_node,
13389 V4SI_type_node, V4SI_type_node, NULL_TREE);
13390 tree v2di_ftype_v2di_v2di
13391 = build_function_type_list (V2DI_type_node,
13392 V2DI_type_node, V2DI_type_node, NULL_TREE);
13393 tree v2di_ftype_v2df_v2df
13394 = build_function_type_list (V2DI_type_node,
13395 V2DF_type_node, V2DF_type_node, NULL_TREE);
13396 tree v2df_ftype_v2df
13397 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13398 tree v2df_ftype_double
13399 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13400 tree v2df_ftype_double_double
13401 = build_function_type_list (V2DF_type_node,
13402 double_type_node, double_type_node, NULL_TREE);
13403 tree int_ftype_v8hi_int
13404 = build_function_type_list (integer_type_node,
13405 V8HI_type_node, integer_type_node, NULL_TREE);
13406 tree v8hi_ftype_v8hi_int_int
13407 = build_function_type_list (V8HI_type_node,
13408 V8HI_type_node, integer_type_node,
13409 integer_type_node, NULL_TREE);
13410 tree v2di_ftype_v2di_int
13411 = build_function_type_list (V2DI_type_node,
13412 V2DI_type_node, integer_type_node, NULL_TREE);
13413 tree v4si_ftype_v4si_int
13414 = build_function_type_list (V4SI_type_node,
13415 V4SI_type_node, integer_type_node, NULL_TREE);
13416 tree v8hi_ftype_v8hi_int
13417 = build_function_type_list (V8HI_type_node,
13418 V8HI_type_node, integer_type_node, NULL_TREE);
13419 tree v8hi_ftype_v8hi_v2di
13420 = build_function_type_list (V8HI_type_node,
13421 V8HI_type_node, V2DI_type_node, NULL_TREE);
13422 tree v4si_ftype_v4si_v2di
13423 = build_function_type_list (V4SI_type_node,
13424 V4SI_type_node, V2DI_type_node, NULL_TREE);
13425 tree v4si_ftype_v8hi_v8hi
13426 = build_function_type_list (V4SI_type_node,
13427 V8HI_type_node, V8HI_type_node, NULL_TREE);
13428 tree di_ftype_v8qi_v8qi
13429 = build_function_type_list (long_long_unsigned_type_node,
13430 V8QI_type_node, V8QI_type_node, NULL_TREE);
13431 tree v2di_ftype_v16qi_v16qi
13432 = build_function_type_list (V2DI_type_node,
13433 V16QI_type_node, V16QI_type_node, NULL_TREE);
13434 tree int_ftype_v16qi
13435 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13436 tree v16qi_ftype_pcchar
13437 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13438 tree void_ftype_pchar_v16qi
13439 = build_function_type_list (void_type_node,
13440 pchar_type_node, V16QI_type_node, NULL_TREE);
13441 tree v4si_ftype_pcint
13442 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13443 tree void_ftype_pcint_v4si
13444 = build_function_type_list (void_type_node,
13445 pcint_type_node, V4SI_type_node, NULL_TREE);
13446 tree v2di_ftype_v2di
13447 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13449 /* Add all builtins that are more or less simple operations on two
13451 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13453 /* Use one of the operands; the target can have a different mode for
13454 mask-generating compares. */
13455 enum machine_mode mode;
13460 mode = insn_data[d->icode].operand[1].mode;
13465 type = v16qi_ftype_v16qi_v16qi;
13468 type = v8hi_ftype_v8hi_v8hi;
13471 type = v4si_ftype_v4si_v4si;
13474 type = v2di_ftype_v2di_v2di;
13477 type = v2df_ftype_v2df_v2df;
13480 type = ti_ftype_ti_ti;
13483 type = v4sf_ftype_v4sf_v4sf;
13486 type = v8qi_ftype_v8qi_v8qi;
13489 type = v4hi_ftype_v4hi_v4hi;
13492 type = v2si_ftype_v2si_v2si;
13495 type = di_ftype_di_di;
13502 /* Override for comparisons. */
13503 if (d->icode == CODE_FOR_maskcmpv4sf3
13504 || d->icode == CODE_FOR_maskncmpv4sf3
13505 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13506 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13507 type = v4si_ftype_v4sf_v4sf;
13509 if (d->icode == CODE_FOR_maskcmpv2df3
13510 || d->icode == CODE_FOR_maskncmpv2df3
13511 || d->icode == CODE_FOR_vmmaskcmpv2df3
13512 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13513 type = v2di_ftype_v2df_v2df;
13515 def_builtin (d->mask, d->name, type, d->code);
13518 /* Add the remaining MMX insns with somewhat more complicated types. */
13519 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13520 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13521 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13522 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13523 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13525 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13526 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13527 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13529 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13530 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13532 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13533 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13535 /* comi/ucomi insns. */
13536 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13537 if (d->mask == MASK_SSE2)
13538 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13540 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13542 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13543 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13544 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13546 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13547 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13548 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13549 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13550 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13551 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13552 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13553 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13554 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13555 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13556 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13558 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13559 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13561 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13563 def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13564 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13565 def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13566 def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13567 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13568 def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13570 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13571 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13572 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13573 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13575 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13576 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13577 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13578 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13580 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13582 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13584 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13585 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13586 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13587 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13588 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13589 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13591 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13593 /* Original 3DNow! */
13594 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13595 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13596 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13597 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13598 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13599 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13600 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13601 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13602 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13603 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13604 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13605 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13606 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13607 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13608 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13609 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13610 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13611 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13612 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13613 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13615 /* 3DNow! extension as used in the Athlon CPU. */
13616 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13617 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13618 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13619 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13620 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13621 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13623 def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13626 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13627 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13629 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13630 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13631 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13633 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13634 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13635 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13636 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13637 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13638 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13640 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13641 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13642 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13643 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13645 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13646 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13647 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13648 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13649 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13651 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13652 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13653 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13654 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13656 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13657 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13659 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13661 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13662 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13664 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13665 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13666 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13667 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13668 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13670 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13672 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13673 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13674 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13675 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13677 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13678 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13679 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13681 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13682 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13683 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13684 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13686 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13687 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13688 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13689 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13690 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13691 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13692 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13694 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13695 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13696 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13698 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13699 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13700 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13701 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13702 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13703 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13704 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13706 def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13708 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13709 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13710 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13712 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13713 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13714 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13716 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13717 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13719 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13720 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13721 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13722 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13724 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13725 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13726 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13727 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13729 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13730 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13732 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13734 /* Prescott New Instructions. */
13735 def_builtin (MASK_PNI, "__builtin_ia32_monitor",
13736 void_ftype_pcvoid_unsigned_unsigned,
13737 IX86_BUILTIN_MONITOR);
13738 def_builtin (MASK_PNI, "__builtin_ia32_mwait",
13739 void_ftype_unsigned_unsigned,
13740 IX86_BUILTIN_MWAIT);
13741 def_builtin (MASK_PNI, "__builtin_ia32_movshdup",
13743 IX86_BUILTIN_MOVSHDUP);
13744 def_builtin (MASK_PNI, "__builtin_ia32_movsldup",
13746 IX86_BUILTIN_MOVSLDUP);
13747 def_builtin (MASK_PNI, "__builtin_ia32_lddqu",
13748 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13749 def_builtin (MASK_PNI, "__builtin_ia32_loadddup",
13750 v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13751 def_builtin (MASK_PNI, "__builtin_ia32_movddup",
13752 v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13755 /* Errors in the source file can cause expand_expr to return const0_rtx
13756 where we expect a vector. To avoid crashing, use one of the vector
13757 clear instructions. */
13759 safe_vector_operand (x, mode)
13761 enum machine_mode mode;
13763 if (x != const0_rtx)
13765 x = gen_reg_rtx (mode);
13767 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13768 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13769 : gen_rtx_SUBREG (DImode, x, 0)));
13771 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13772 : gen_rtx_SUBREG (V4SFmode, x, 0),
13773 CONST0_RTX (V4SFmode)));
13777 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13780 ix86_expand_binop_builtin (icode, arglist, target)
13781 enum insn_code icode;
13786 tree arg0 = TREE_VALUE (arglist);
13787 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13788 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13789 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13790 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13791 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13792 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13794 if (VECTOR_MODE_P (mode0))
13795 op0 = safe_vector_operand (op0, mode0);
13796 if (VECTOR_MODE_P (mode1))
13797 op1 = safe_vector_operand (op1, mode1);
13800 || GET_MODE (target) != tmode
13801 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13802 target = gen_reg_rtx (tmode);
13804 if (GET_MODE (op1) == SImode && mode1 == TImode)
13806 rtx x = gen_reg_rtx (V4SImode);
13807 emit_insn (gen_sse2_loadd (x, op1));
13808 op1 = gen_lowpart (TImode, x);
13811 /* In case the insn wants input operands in modes different from
13812 the result, abort. */
13813 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13816 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13817 op0 = copy_to_mode_reg (mode0, op0);
13818 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13819 op1 = copy_to_mode_reg (mode1, op1);
13821 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13822 yet one of the two must not be a memory. This is normally enforced
13823 by expanders, but we didn't bother to create one here. */
13824 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13825 op0 = copy_to_mode_reg (mode0, op0);
13827 pat = GEN_FCN (icode) (target, op0, op1);
13834 /* Subroutine of ix86_expand_builtin to take care of stores. */
13837 ix86_expand_store_builtin (icode, arglist)
13838 enum insn_code icode;
13842 tree arg0 = TREE_VALUE (arglist);
13843 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13844 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13845 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13846 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13847 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13849 if (VECTOR_MODE_P (mode1))
13850 op1 = safe_vector_operand (op1, mode1);
13852 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13853 op1 = copy_to_mode_reg (mode1, op1);
13855 pat = GEN_FCN (icode) (op0, op1);
13861 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13864 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13865 enum insn_code icode;
13871 tree arg0 = TREE_VALUE (arglist);
13872 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13873 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13874 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13877 || GET_MODE (target) != tmode
13878 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13879 target = gen_reg_rtx (tmode);
13881 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13884 if (VECTOR_MODE_P (mode0))
13885 op0 = safe_vector_operand (op0, mode0);
13887 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13888 op0 = copy_to_mode_reg (mode0, op0);
13891 pat = GEN_FCN (icode) (target, op0);
13898 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13899 sqrtss, rsqrtss, rcpss. */
13902 ix86_expand_unop1_builtin (icode, arglist, target)
13903 enum insn_code icode;
13908 tree arg0 = TREE_VALUE (arglist);
13909 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13910 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13911 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13914 || GET_MODE (target) != tmode
13915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13916 target = gen_reg_rtx (tmode);
13918 if (VECTOR_MODE_P (mode0))
13919 op0 = safe_vector_operand (op0, mode0);
13921 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13922 op0 = copy_to_mode_reg (mode0, op0);
13925 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13926 op1 = copy_to_mode_reg (mode0, op1);
13928 pat = GEN_FCN (icode) (target, op0, op1);
13935 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13938 ix86_expand_sse_compare (d, arglist, target)
13939 const struct builtin_description *d;
13944 tree arg0 = TREE_VALUE (arglist);
13945 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13946 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13947 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13949 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13950 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13951 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13952 enum rtx_code comparison = d->comparison;
13954 if (VECTOR_MODE_P (mode0))
13955 op0 = safe_vector_operand (op0, mode0);
13956 if (VECTOR_MODE_P (mode1))
13957 op1 = safe_vector_operand (op1, mode1);
13959 /* Swap operands if we have a comparison that isn't available in
13963 rtx tmp = gen_reg_rtx (mode1);
13964 emit_move_insn (tmp, op1);
13970 || GET_MODE (target) != tmode
13971 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13972 target = gen_reg_rtx (tmode);
13974 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13975 op0 = copy_to_mode_reg (mode0, op0);
13976 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13977 op1 = copy_to_mode_reg (mode1, op1);
13979 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13980 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13987 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13990 ix86_expand_sse_comi (d, arglist, target)
13991 const struct builtin_description *d;
13996 tree arg0 = TREE_VALUE (arglist);
13997 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13998 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13999 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14001 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14002 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14003 enum rtx_code comparison = d->comparison;
14005 if (VECTOR_MODE_P (mode0))
14006 op0 = safe_vector_operand (op0, mode0);
14007 if (VECTOR_MODE_P (mode1))
14008 op1 = safe_vector_operand (op1, mode1);
14010 /* Swap operands if we have a comparison that isn't available in
14019 target = gen_reg_rtx (SImode);
14020 emit_move_insn (target, const0_rtx);
14021 target = gen_rtx_SUBREG (QImode, target, 0);
14023 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14024 op0 = copy_to_mode_reg (mode0, op0);
14025 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14026 op1 = copy_to_mode_reg (mode1, op1);
14028 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14029 pat = GEN_FCN (d->icode) (op0, op1);
14033 emit_insn (gen_rtx_SET (VOIDmode,
14034 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14035 gen_rtx_fmt_ee (comparison, QImode,
14039 return SUBREG_REG (target);
14042 /* Expand an expression EXP that calls a built-in function,
14043 with result going to TARGET if that's convenient
14044 (and in mode MODE if that's convenient).
14045 SUBTARGET may be used as the target for computing one of EXP's operands.
14046 IGNORE is nonzero if the value is to be ignored. */
14049 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
14052 rtx subtarget ATTRIBUTE_UNUSED;
14053 enum machine_mode mode ATTRIBUTE_UNUSED;
14054 int ignore ATTRIBUTE_UNUSED;
14056 const struct builtin_description *d;
14058 enum insn_code icode;
14059 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14060 tree arglist = TREE_OPERAND (exp, 1);
14061 tree arg0, arg1, arg2;
14062 rtx op0, op1, op2, pat;
14063 enum machine_mode tmode, mode0, mode1, mode2;
14064 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14068 case IX86_BUILTIN_EMMS:
14069 emit_insn (gen_emms ());
14072 case IX86_BUILTIN_SFENCE:
14073 emit_insn (gen_sfence ());
14076 case IX86_BUILTIN_PEXTRW:
14077 case IX86_BUILTIN_PEXTRW128:
14078 icode = (fcode == IX86_BUILTIN_PEXTRW
14079 ? CODE_FOR_mmx_pextrw
14080 : CODE_FOR_sse2_pextrw);
14081 arg0 = TREE_VALUE (arglist);
14082 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14083 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14084 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14085 tmode = insn_data[icode].operand[0].mode;
14086 mode0 = insn_data[icode].operand[1].mode;
14087 mode1 = insn_data[icode].operand[2].mode;
14089 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14090 op0 = copy_to_mode_reg (mode0, op0);
14091 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14093 /* @@@ better error message */
14094 error ("selector must be an immediate");
14095 return gen_reg_rtx (tmode);
14098 || GET_MODE (target) != tmode
14099 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14100 target = gen_reg_rtx (tmode);
14101 pat = GEN_FCN (icode) (target, op0, op1);
14107 case IX86_BUILTIN_PINSRW:
14108 case IX86_BUILTIN_PINSRW128:
14109 icode = (fcode == IX86_BUILTIN_PINSRW
14110 ? CODE_FOR_mmx_pinsrw
14111 : CODE_FOR_sse2_pinsrw);
14112 arg0 = TREE_VALUE (arglist);
14113 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14114 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14115 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14116 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14117 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14118 tmode = insn_data[icode].operand[0].mode;
14119 mode0 = insn_data[icode].operand[1].mode;
14120 mode1 = insn_data[icode].operand[2].mode;
14121 mode2 = insn_data[icode].operand[3].mode;
14123 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14124 op0 = copy_to_mode_reg (mode0, op0);
14125 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14126 op1 = copy_to_mode_reg (mode1, op1);
14127 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14129 /* @@@ better error message */
14130 error ("selector must be an immediate");
14134 || GET_MODE (target) != tmode
14135 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14136 target = gen_reg_rtx (tmode);
14137 pat = GEN_FCN (icode) (target, op0, op1, op2);
14143 case IX86_BUILTIN_MASKMOVQ:
14144 case IX86_BUILTIN_MASKMOVDQU:
14145 icode = (fcode == IX86_BUILTIN_MASKMOVQ
14146 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14147 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14148 : CODE_FOR_sse2_maskmovdqu));
14149 /* Note the arg order is different from the operand order. */
14150 arg1 = TREE_VALUE (arglist);
14151 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14152 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14153 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14154 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14155 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14156 mode0 = insn_data[icode].operand[0].mode;
14157 mode1 = insn_data[icode].operand[1].mode;
14158 mode2 = insn_data[icode].operand[2].mode;
14160 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14161 op0 = copy_to_mode_reg (mode0, op0);
14162 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14163 op1 = copy_to_mode_reg (mode1, op1);
14164 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14165 op2 = copy_to_mode_reg (mode2, op2);
14166 pat = GEN_FCN (icode) (op0, op1, op2);
14172 case IX86_BUILTIN_SQRTSS:
14173 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14174 case IX86_BUILTIN_RSQRTSS:
14175 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14176 case IX86_BUILTIN_RCPSS:
14177 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14179 case IX86_BUILTIN_LOADAPS:
14180 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14182 case IX86_BUILTIN_LOADUPS:
14183 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14185 case IX86_BUILTIN_STOREAPS:
14186 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14188 case IX86_BUILTIN_STOREUPS:
14189 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14191 case IX86_BUILTIN_LOADSS:
14192 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14194 case IX86_BUILTIN_STORESS:
14195 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14197 case IX86_BUILTIN_LOADHPS:
14198 case IX86_BUILTIN_LOADLPS:
14199 case IX86_BUILTIN_LOADHPD:
14200 case IX86_BUILTIN_LOADLPD:
14201 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14202 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14203 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14204 : CODE_FOR_sse2_movlpd);
14205 arg0 = TREE_VALUE (arglist);
14206 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14207 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14208 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14209 tmode = insn_data[icode].operand[0].mode;
14210 mode0 = insn_data[icode].operand[1].mode;
14211 mode1 = insn_data[icode].operand[2].mode;
14213 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14214 op0 = copy_to_mode_reg (mode0, op0);
14215 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14217 || GET_MODE (target) != tmode
14218 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14219 target = gen_reg_rtx (tmode);
14220 pat = GEN_FCN (icode) (target, op0, op1);
14226 case IX86_BUILTIN_STOREHPS:
14227 case IX86_BUILTIN_STORELPS:
14228 case IX86_BUILTIN_STOREHPD:
14229 case IX86_BUILTIN_STORELPD:
14230 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14231 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14232 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14233 : CODE_FOR_sse2_movlpd);
14234 arg0 = TREE_VALUE (arglist);
14235 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14236 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14237 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14238 mode0 = insn_data[icode].operand[1].mode;
14239 mode1 = insn_data[icode].operand[2].mode;
14241 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14242 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14243 op1 = copy_to_mode_reg (mode1, op1);
14245 pat = GEN_FCN (icode) (op0, op0, op1);
14251 case IX86_BUILTIN_MOVNTPS:
14252 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14253 case IX86_BUILTIN_MOVNTQ:
14254 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14256 case IX86_BUILTIN_LDMXCSR:
14257 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14258 target = assign_386_stack_local (SImode, 0);
14259 emit_move_insn (target, op0);
14260 emit_insn (gen_ldmxcsr (target));
14263 case IX86_BUILTIN_STMXCSR:
14264 target = assign_386_stack_local (SImode, 0);
14265 emit_insn (gen_stmxcsr (target));
14266 return copy_to_mode_reg (SImode, target);
14268 case IX86_BUILTIN_SHUFPS:
14269 case IX86_BUILTIN_SHUFPD:
14270 icode = (fcode == IX86_BUILTIN_SHUFPS
14271 ? CODE_FOR_sse_shufps
14272 : CODE_FOR_sse2_shufpd);
14273 arg0 = TREE_VALUE (arglist);
14274 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14275 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14276 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14277 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14278 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14279 tmode = insn_data[icode].operand[0].mode;
14280 mode0 = insn_data[icode].operand[1].mode;
14281 mode1 = insn_data[icode].operand[2].mode;
14282 mode2 = insn_data[icode].operand[3].mode;
14284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14285 op0 = copy_to_mode_reg (mode0, op0);
14286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14287 op1 = copy_to_mode_reg (mode1, op1);
14288 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14290 /* @@@ better error message */
14291 error ("mask must be an immediate");
14292 return gen_reg_rtx (tmode);
14295 || GET_MODE (target) != tmode
14296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14297 target = gen_reg_rtx (tmode);
14298 pat = GEN_FCN (icode) (target, op0, op1, op2);
14304 case IX86_BUILTIN_PSHUFW:
14305 case IX86_BUILTIN_PSHUFD:
14306 case IX86_BUILTIN_PSHUFHW:
14307 case IX86_BUILTIN_PSHUFLW:
14308 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14309 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14310 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14311 : CODE_FOR_mmx_pshufw);
14312 arg0 = TREE_VALUE (arglist);
14313 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14314 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14315 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14316 tmode = insn_data[icode].operand[0].mode;
14317 mode1 = insn_data[icode].operand[1].mode;
14318 mode2 = insn_data[icode].operand[2].mode;
14320 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14321 op0 = copy_to_mode_reg (mode1, op0);
14322 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14324 /* @@@ better error message */
14325 error ("mask must be an immediate");
14329 || GET_MODE (target) != tmode
14330 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14331 target = gen_reg_rtx (tmode);
14332 pat = GEN_FCN (icode) (target, op0, op1);
14338 case IX86_BUILTIN_PSLLDQI128:
14339 case IX86_BUILTIN_PSRLDQI128:
14340 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14341 : CODE_FOR_sse2_lshrti3);
14342 arg0 = TREE_VALUE (arglist);
14343 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14344 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14345 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14346 tmode = insn_data[icode].operand[0].mode;
14347 mode1 = insn_data[icode].operand[1].mode;
14348 mode2 = insn_data[icode].operand[2].mode;
14350 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14352 op0 = copy_to_reg (op0);
14353 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14355 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14357 error ("shift must be an immediate");
14360 target = gen_reg_rtx (V2DImode);
14361 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14367 case IX86_BUILTIN_FEMMS:
14368 emit_insn (gen_femms ());
14371 case IX86_BUILTIN_PAVGUSB:
14372 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14374 case IX86_BUILTIN_PF2ID:
14375 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14377 case IX86_BUILTIN_PFACC:
14378 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14380 case IX86_BUILTIN_PFADD:
14381 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14383 case IX86_BUILTIN_PFCMPEQ:
14384 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14386 case IX86_BUILTIN_PFCMPGE:
14387 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14389 case IX86_BUILTIN_PFCMPGT:
14390 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14392 case IX86_BUILTIN_PFMAX:
14393 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14395 case IX86_BUILTIN_PFMIN:
14396 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14398 case IX86_BUILTIN_PFMUL:
14399 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14401 case IX86_BUILTIN_PFRCP:
14402 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14404 case IX86_BUILTIN_PFRCPIT1:
14405 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14407 case IX86_BUILTIN_PFRCPIT2:
14408 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14410 case IX86_BUILTIN_PFRSQIT1:
14411 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14413 case IX86_BUILTIN_PFRSQRT:
14414 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14416 case IX86_BUILTIN_PFSUB:
14417 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14419 case IX86_BUILTIN_PFSUBR:
14420 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14422 case IX86_BUILTIN_PI2FD:
14423 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14425 case IX86_BUILTIN_PMULHRW:
14426 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14428 case IX86_BUILTIN_PF2IW:
14429 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14431 case IX86_BUILTIN_PFNACC:
14432 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14434 case IX86_BUILTIN_PFPNACC:
14435 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14437 case IX86_BUILTIN_PI2FW:
14438 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14440 case IX86_BUILTIN_PSWAPDSI:
14441 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14443 case IX86_BUILTIN_PSWAPDSF:
14444 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14446 case IX86_BUILTIN_SSE_ZERO:
14447 target = gen_reg_rtx (V4SFmode);
14448 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14451 case IX86_BUILTIN_MMX_ZERO:
14452 target = gen_reg_rtx (DImode);
14453 emit_insn (gen_mmx_clrdi (target));
14456 case IX86_BUILTIN_CLRTI:
14457 target = gen_reg_rtx (V2DImode);
14458 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14462 case IX86_BUILTIN_SQRTSD:
14463 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14464 case IX86_BUILTIN_LOADAPD:
14465 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14466 case IX86_BUILTIN_LOADUPD:
14467 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14469 case IX86_BUILTIN_STOREAPD:
14470 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14471 case IX86_BUILTIN_STOREUPD:
14472 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14474 case IX86_BUILTIN_LOADSD:
14475 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14477 case IX86_BUILTIN_STORESD:
14478 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14480 case IX86_BUILTIN_SETPD1:
14481 target = assign_386_stack_local (DFmode, 0);
14482 arg0 = TREE_VALUE (arglist);
14483 emit_move_insn (adjust_address (target, DFmode, 0),
14484 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14485 op0 = gen_reg_rtx (V2DFmode);
14486 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14487 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14490 case IX86_BUILTIN_SETPD:
14491 target = assign_386_stack_local (V2DFmode, 0);
14492 arg0 = TREE_VALUE (arglist);
14493 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14494 emit_move_insn (adjust_address (target, DFmode, 0),
14495 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14496 emit_move_insn (adjust_address (target, DFmode, 8),
14497 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14498 op0 = gen_reg_rtx (V2DFmode);
14499 emit_insn (gen_sse2_movapd (op0, target));
14502 case IX86_BUILTIN_LOADRPD:
14503 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14504 gen_reg_rtx (V2DFmode), 1);
14505 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14508 case IX86_BUILTIN_LOADPD1:
14509 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14510 gen_reg_rtx (V2DFmode), 1);
14511 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14514 case IX86_BUILTIN_STOREPD1:
14515 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14516 case IX86_BUILTIN_STORERPD:
14517 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14519 case IX86_BUILTIN_CLRPD:
14520 target = gen_reg_rtx (V2DFmode);
14521 emit_insn (gen_sse_clrv2df (target));
14524 case IX86_BUILTIN_MFENCE:
14525 emit_insn (gen_sse2_mfence ());
14527 case IX86_BUILTIN_LFENCE:
14528 emit_insn (gen_sse2_lfence ());
14531 case IX86_BUILTIN_CLFLUSH:
14532 arg0 = TREE_VALUE (arglist);
14533 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14534 icode = CODE_FOR_sse2_clflush;
14535 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14536 op0 = copy_to_mode_reg (Pmode, op0);
14538 emit_insn (gen_sse2_clflush (op0));
14541 case IX86_BUILTIN_MOVNTPD:
14542 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14543 case IX86_BUILTIN_MOVNTDQ:
14544 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14545 case IX86_BUILTIN_MOVNTI:
14546 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14548 case IX86_BUILTIN_LOADDQA:
14549 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14550 case IX86_BUILTIN_LOADDQU:
14551 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14552 case IX86_BUILTIN_LOADD:
14553 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14555 case IX86_BUILTIN_STOREDQA:
14556 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14557 case IX86_BUILTIN_STOREDQU:
14558 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14559 case IX86_BUILTIN_STORED:
14560 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14562 case IX86_BUILTIN_MONITOR:
14563 arg0 = TREE_VALUE (arglist);
14564 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14565 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14566 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14567 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14568 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14570 op0 = copy_to_mode_reg (SImode, op0);
14572 op1 = copy_to_mode_reg (SImode, op1);
14574 op2 = copy_to_mode_reg (SImode, op2);
14575 emit_insn (gen_monitor (op0, op1, op2));
14578 case IX86_BUILTIN_MWAIT:
14579 arg0 = TREE_VALUE (arglist);
14580 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14581 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14582 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14584 op0 = copy_to_mode_reg (SImode, op0);
14586 op1 = copy_to_mode_reg (SImode, op1);
14587 emit_insn (gen_mwait (op0, op1));
14590 case IX86_BUILTIN_LOADDDUP:
14591 return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14593 case IX86_BUILTIN_LDDQU:
14594 return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14601 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14602 if (d->code == fcode)
14604 /* Compares are treated specially. */
14605 if (d->icode == CODE_FOR_maskcmpv4sf3
14606 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14607 || d->icode == CODE_FOR_maskncmpv4sf3
14608 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14609 || d->icode == CODE_FOR_maskcmpv2df3
14610 || d->icode == CODE_FOR_vmmaskcmpv2df3
14611 || d->icode == CODE_FOR_maskncmpv2df3
14612 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14613 return ix86_expand_sse_compare (d, arglist, target);
14615 return ix86_expand_binop_builtin (d->icode, arglist, target);
14618 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14619 if (d->code == fcode)
14620 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14622 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14623 if (d->code == fcode)
14624 return ix86_expand_sse_comi (d, arglist, target);
14626 /* @@@ Should really do something sensible here. */
14630 /* Store OPERAND to the memory after reload is completed. This means
14631 that we can't easily use assign_stack_local. */
14633 ix86_force_to_memory (mode, operand)
14634 enum machine_mode mode;
14638 if (!reload_completed)
14640 if (TARGET_RED_ZONE)
14642 result = gen_rtx_MEM (mode,
14643 gen_rtx_PLUS (Pmode,
14645 GEN_INT (-RED_ZONE_SIZE)));
14646 emit_move_insn (result, operand);
14648 else if (!TARGET_RED_ZONE && TARGET_64BIT)
14654 operand = gen_lowpart (DImode, operand);
14658 gen_rtx_SET (VOIDmode,
14659 gen_rtx_MEM (DImode,
14660 gen_rtx_PRE_DEC (DImode,
14661 stack_pointer_rtx)),
14667 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14676 split_di (&operand, 1, operands, operands + 1);
14678 gen_rtx_SET (VOIDmode,
14679 gen_rtx_MEM (SImode,
14680 gen_rtx_PRE_DEC (Pmode,
14681 stack_pointer_rtx)),
14684 gen_rtx_SET (VOIDmode,
14685 gen_rtx_MEM (SImode,
14686 gen_rtx_PRE_DEC (Pmode,
14687 stack_pointer_rtx)),
14692 /* It is better to store HImodes as SImodes. */
14693 if (!TARGET_PARTIAL_REG_STALL)
14694 operand = gen_lowpart (SImode, operand);
14698 gen_rtx_SET (VOIDmode,
14699 gen_rtx_MEM (GET_MODE (operand),
14700 gen_rtx_PRE_DEC (SImode,
14701 stack_pointer_rtx)),
14707 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14712 /* Free operand from the memory. */
14714 ix86_free_from_memory (mode)
14715 enum machine_mode mode;
14717 if (!TARGET_RED_ZONE)
14721 if (mode == DImode || TARGET_64BIT)
14723 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14727 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14728 to pop or add instruction if registers are available. */
14729 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14730 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14735 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14736 QImode must go into class Q_REGS.
14737 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14738 movdf to do mem-to-mem moves through integer regs. */
14740 ix86_preferred_reload_class (x, class)
14742 enum reg_class class;
14744 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14746 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14748 /* SSE can't load any constant directly yet. */
14749 if (SSE_CLASS_P (class))
14751 /* Floats can load 0 and 1. */
14752 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14754 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14755 if (MAYBE_SSE_CLASS_P (class))
14756 return (reg_class_subset_p (class, GENERAL_REGS)
14757 ? GENERAL_REGS : FLOAT_REGS);
14761 /* General regs can load everything. */
14762 if (reg_class_subset_p (class, GENERAL_REGS))
14763 return GENERAL_REGS;
14764 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14765 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14768 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14770 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14775 /* If we are copying between general and FP registers, we need a memory
14776 location. The same is true for SSE and MMX registers.
14778 The macro can't work reliably when one of the CLASSES is class containing
14779 registers from multiple units (SSE, MMX, integer). We avoid this by never
14780 combining those units in single alternative in the machine description.
14781 Ensure that this constraint holds to avoid unexpected surprises.
14783 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14784 enforce these sanity checks. */
14786 ix86_secondary_memory_needed (class1, class2, mode, strict)
14787 enum reg_class class1, class2;
14788 enum machine_mode mode;
14791 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14792 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14793 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14794 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14795 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14796 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14803 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14804 || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14805 || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14806 && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14807 || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14809 /* Return the cost of moving data from a register in class CLASS1 to
14810 one in class CLASS2.
14812 It is not required that the cost always equal 2 when FROM is the same as TO;
14813 on some machines it is expensive to move between registers if they are not
14814 general registers. */
14816 ix86_register_move_cost (mode, class1, class2)
14817 enum machine_mode mode;
14818 enum reg_class class1, class2;
14820 /* In case we require secondary memory, compute cost of the store followed
14821 by load. In order to avoid bad register allocation choices, we need
14822 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14824 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14828 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14829 MEMORY_MOVE_COST (mode, class1, 1));
14830 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14831 MEMORY_MOVE_COST (mode, class2, 1));
14833 /* In case of copying from general_purpose_register we may emit multiple
14834 stores followed by single load causing memory size mismatch stall.
14835 Count this as arbitrarily high cost of 20. */
14836 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14839 /* In the case of FP/MMX moves, the registers actually overlap, and we
14840 have to switch modes in order to treat them differently. */
14841 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14842 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14848 /* Moves between SSE/MMX and integer unit are expensive. */
14849 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14850 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14851 return ix86_cost->mmxsse_to_integer;
14852 if (MAYBE_FLOAT_CLASS_P (class1))
14853 return ix86_cost->fp_move;
14854 if (MAYBE_SSE_CLASS_P (class1))
14855 return ix86_cost->sse_move;
14856 if (MAYBE_MMX_CLASS_P (class1))
14857 return ix86_cost->mmx_move;
14861 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14863 ix86_hard_regno_mode_ok (regno, mode)
14865 enum machine_mode mode;
14867 /* Flags and only flags can only hold CCmode values. */
14868 if (CC_REGNO_P (regno))
14869 return GET_MODE_CLASS (mode) == MODE_CC;
14870 if (GET_MODE_CLASS (mode) == MODE_CC
14871 || GET_MODE_CLASS (mode) == MODE_RANDOM
14872 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14874 if (FP_REGNO_P (regno))
14875 return VALID_FP_MODE_P (mode);
14876 if (SSE_REGNO_P (regno))
14877 return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
14878 if (MMX_REGNO_P (regno))
14880 ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
14881 /* We handle both integer and floats in the general purpose registers.
14882 In future we should be able to handle vector modes as well. */
14883 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14885 /* Take care for QImode values - they can be in non-QI regs, but then
14886 they do cause partial register stalls. */
14887 if (regno < 4 || mode != QImode || TARGET_64BIT)
14889 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14892 /* Return the cost of moving data of mode M between a
14893 register and memory. A value of 2 is the default; this cost is
14894 relative to those in `REGISTER_MOVE_COST'.
14896 If moving between registers and memory is more expensive than
14897 between two registers, you should define this macro to express the
14900 Model also increased moving costs of QImode registers in non
14904 ix86_memory_move_cost (mode, class, in)
14905 enum machine_mode mode;
14906 enum reg_class class;
14909 if (FLOAT_CLASS_P (class))
14927 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14929 if (SSE_CLASS_P (class))
14932 switch (GET_MODE_SIZE (mode))
14946 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14948 if (MMX_CLASS_P (class))
14951 switch (GET_MODE_SIZE (mode))
14962 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14964 switch (GET_MODE_SIZE (mode))
14968 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14969 : ix86_cost->movzbl_load);
14971 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14972 : ix86_cost->int_store[0] + 4);
14975 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14977 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14978 if (mode == TFmode)
14980 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14981 * (((int) GET_MODE_SIZE (mode)
14982 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
14986 /* Compute a (partial) cost for rtx X. Return true if the complete
14987 cost has been computed, and false if subexpressions should be
14988 scanned. In either case, *TOTAL contains the cost result. */
14991 ix86_rtx_costs (x, code, outer_code, total)
14993 int code, outer_code;
14996 enum machine_mode mode = GET_MODE (x);
15004 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15006 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15008 else if (flag_pic && SYMBOLIC_CONST (x)
15010 || (!GET_CODE (x) != LABEL_REF
15011 && (GET_CODE (x) != SYMBOL_REF
15012 || !SYMBOL_REF_LOCAL_P (x)))))
15019 if (mode == VOIDmode)
15022 switch (standard_80387_constant_p (x))
15027 default: /* Other constants */
15032 /* Start with (MEM (SYMBOL_REF)), since that's where
15033 it'll probably end up. Add a penalty for size. */
15034 *total = (COSTS_N_INSNS (1)
15035 + (flag_pic != 0 && !TARGET_64BIT)
15036 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15042 /* The zero extensions is often completely free on x86_64, so make
15043 it as cheap as possible. */
15044 if (TARGET_64BIT && mode == DImode
15045 && GET_MODE (XEXP (x, 0)) == SImode)
15047 else if (TARGET_ZERO_EXTEND_WITH_AND)
15048 *total = COSTS_N_INSNS (ix86_cost->add);
15050 *total = COSTS_N_INSNS (ix86_cost->movzx);
15054 *total = COSTS_N_INSNS (ix86_cost->movsx);
15058 if (GET_CODE (XEXP (x, 1)) == CONST_INT
15059 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15061 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15064 *total = COSTS_N_INSNS (ix86_cost->add);
15067 if ((value == 2 || value == 3)
15068 && !TARGET_DECOMPOSE_LEA
15069 && ix86_cost->lea <= ix86_cost->shift_const)
15071 *total = COSTS_N_INSNS (ix86_cost->lea);
15081 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15083 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15085 if (INTVAL (XEXP (x, 1)) > 32)
15086 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15088 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15092 if (GET_CODE (XEXP (x, 1)) == AND)
15093 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15095 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15100 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15101 *total = COSTS_N_INSNS (ix86_cost->shift_const);
15103 *total = COSTS_N_INSNS (ix86_cost->shift_var);
15108 if (FLOAT_MODE_P (mode))
15109 *total = COSTS_N_INSNS (ix86_cost->fmul);
15110 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15112 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15115 for (nbits = 0; value != 0; value >>= 1)
15118 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15119 + nbits * ix86_cost->mult_bit);
15123 /* This is arbitrary */
15124 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15125 + 7 * ix86_cost->mult_bit);
15133 if (FLOAT_MODE_P (mode))
15134 *total = COSTS_N_INSNS (ix86_cost->fdiv);
15136 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15140 if (FLOAT_MODE_P (mode))
15141 *total = COSTS_N_INSNS (ix86_cost->fadd);
15142 else if (!TARGET_DECOMPOSE_LEA
15143 && GET_MODE_CLASS (mode) == MODE_INT
15144 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15146 if (GET_CODE (XEXP (x, 0)) == PLUS
15147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15148 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15149 && CONSTANT_P (XEXP (x, 1)))
15151 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15152 if (val == 2 || val == 4 || val == 8)
15154 *total = COSTS_N_INSNS (ix86_cost->lea);
15155 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15156 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15158 *total += rtx_cost (XEXP (x, 1), outer_code);
15162 else if (GET_CODE (XEXP (x, 0)) == MULT
15163 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15165 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15166 if (val == 2 || val == 4 || val == 8)
15168 *total = COSTS_N_INSNS (ix86_cost->lea);
15169 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15170 *total += rtx_cost (XEXP (x, 1), outer_code);
15174 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15176 *total = COSTS_N_INSNS (ix86_cost->lea);
15177 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15178 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15179 *total += rtx_cost (XEXP (x, 1), outer_code);
15186 if (FLOAT_MODE_P (mode))
15188 *total = COSTS_N_INSNS (ix86_cost->fadd);
15196 if (!TARGET_64BIT && mode == DImode)
15198 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15199 + (rtx_cost (XEXP (x, 0), outer_code)
15200 << (GET_MODE (XEXP (x, 0)) != DImode))
15201 + (rtx_cost (XEXP (x, 1), outer_code)
15202 << (GET_MODE (XEXP (x, 1)) != DImode)));
15208 if (FLOAT_MODE_P (mode))
15210 *total = COSTS_N_INSNS (ix86_cost->fchs);
15216 if (!TARGET_64BIT && mode == DImode)
15217 *total = COSTS_N_INSNS (ix86_cost->add * 2);
15219 *total = COSTS_N_INSNS (ix86_cost->add);
15223 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
15228 if (FLOAT_MODE_P (mode))
15229 *total = COSTS_N_INSNS (ix86_cost->fabs);
15233 if (FLOAT_MODE_P (mode))
15234 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
15238 if (XINT (x, 1) == UNSPEC_TP)
15247 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15249 ix86_svr3_asm_out_constructor (symbol, priority)
15251 int priority ATTRIBUTE_UNUSED;
15254 fputs ("\tpushl $", asm_out_file);
15255 assemble_name (asm_out_file, XSTR (symbol, 0));
15256 fputc ('\n', asm_out_file);
15262 static int current_machopic_label_num;
15264 /* Given a symbol name and its associated stub, write out the
15265 definition of the stub. */
15268 machopic_output_stub (file, symb, stub)
15270 const char *symb, *stub;
15272 unsigned int length;
15273 char *binder_name, *symbol_name, lazy_ptr_name[32];
15274 int label = ++current_machopic_label_num;
15276 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
15277 symb = (*targetm.strip_name_encoding) (symb);
15279 length = strlen (stub);
15280 binder_name = alloca (length + 32);
15281 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15283 length = strlen (symb);
15284 symbol_name = alloca (length + 32);
15285 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15287 sprintf (lazy_ptr_name, "L%d$lz", label);
15290 machopic_picsymbol_stub_section ();
15292 machopic_symbol_stub_section ();
15294 fprintf (file, "%s:\n", stub);
15295 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15299 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15300 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15301 fprintf (file, "\tjmp %%edx\n");
15304 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15306 fprintf (file, "%s:\n", binder_name);
15310 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15311 fprintf (file, "\tpushl %%eax\n");
15314 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15316 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15318 machopic_lazy_symbol_ptr_section ();
15319 fprintf (file, "%s:\n", lazy_ptr_name);
15320 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15321 fprintf (file, "\t.long %s\n", binder_name);
15323 #endif /* TARGET_MACHO */
15325 /* Order the registers for register allocator. */
15328 x86_order_regs_for_local_alloc ()
15333 /* First allocate the local general purpose registers. */
15334 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15335 if (GENERAL_REGNO_P (i) && call_used_regs[i])
15336 reg_alloc_order [pos++] = i;
15338 /* Global general purpose registers. */
15339 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15340 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15341 reg_alloc_order [pos++] = i;
15343 /* x87 registers come first in case we are doing FP math
15345 if (!TARGET_SSE_MATH)
15346 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15347 reg_alloc_order [pos++] = i;
15349 /* SSE registers. */
15350 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15351 reg_alloc_order [pos++] = i;
15352 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15353 reg_alloc_order [pos++] = i;
15355 /* x87 registers. */
15356 if (TARGET_SSE_MATH)
15357 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15358 reg_alloc_order [pos++] = i;
15360 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15361 reg_alloc_order [pos++] = i;
15363 /* Initialize the rest of array as we do not allocate some registers
15365 while (pos < FIRST_PSEUDO_REGISTER)
15366 reg_alloc_order [pos++] = 0;
15369 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15370 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
15373 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15374 struct attribute_spec.handler. */
15376 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
15379 tree args ATTRIBUTE_UNUSED;
15380 int flags ATTRIBUTE_UNUSED;
15381 bool *no_add_attrs;
15384 if (DECL_P (*node))
15386 if (TREE_CODE (*node) == TYPE_DECL)
15387 type = &TREE_TYPE (*node);
15392 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15393 || TREE_CODE (*type) == UNION_TYPE)))
15395 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15396 *no_add_attrs = true;
15399 else if ((is_attribute_p ("ms_struct", name)
15400 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15401 || ((is_attribute_p ("gcc_struct", name)
15402 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15404 warning ("`%s' incompatible attribute ignored",
15405 IDENTIFIER_POINTER (name));
15406 *no_add_attrs = true;
15413 ix86_ms_bitfield_layout_p (record_type)
15416 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15417 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15418 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15421 /* Returns an expression indicating where the this parameter is
15422 located on entry to the FUNCTION. */
15425 x86_this_parameter (function)
15428 tree type = TREE_TYPE (function);
15432 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15433 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15436 if (ix86_fntype_regparm (type) > 0)
15440 parm = TYPE_ARG_TYPES (type);
15441 /* Figure out whether or not the function has a variable number of
15443 for (; parm; parm = TREE_CHAIN (parm))
15444 if (TREE_VALUE (parm) == void_type_node)
15446 /* If not, the this parameter is in %eax. */
15448 return gen_rtx_REG (SImode, 0);
15451 if (aggregate_value_p (TREE_TYPE (type)))
15452 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15454 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15457 /* Determine whether x86_output_mi_thunk can succeed. */
15460 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15461 tree thunk ATTRIBUTE_UNUSED;
15462 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15463 HOST_WIDE_INT vcall_offset;
15466 /* 64-bit can handle anything. */
15470 /* For 32-bit, everything's fine if we have one free register. */
15471 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15474 /* Need a free register for vcall_offset. */
15478 /* Need a free register for GOT references. */
15479 if (flag_pic && !(*targetm.binds_local_p) (function))
15482 /* Otherwise ok. */
15486 /* Output the assembler code for a thunk function. THUNK_DECL is the
15487 declaration for the thunk function itself, FUNCTION is the decl for
15488 the target function. DELTA is an immediate constant offset to be
15489 added to THIS. If VCALL_OFFSET is nonzero, the word at
15490 *(*this + vcall_offset) should be added to THIS. */
15493 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15494 FILE *file ATTRIBUTE_UNUSED;
15495 tree thunk ATTRIBUTE_UNUSED;
15496 HOST_WIDE_INT delta;
15497 HOST_WIDE_INT vcall_offset;
15501 rtx this = x86_this_parameter (function);
15504 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15505 pull it in now and let DELTA benefit. */
15508 else if (vcall_offset)
15510 /* Put the this parameter into %eax. */
15512 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15513 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15516 this_reg = NULL_RTX;
15518 /* Adjust the this parameter by a fixed constant. */
15521 xops[0] = GEN_INT (delta);
15522 xops[1] = this_reg ? this_reg : this;
15525 if (!x86_64_general_operand (xops[0], DImode))
15527 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15529 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15533 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15536 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15539 /* Adjust the this parameter by a value stored in the vtable. */
15543 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15545 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15547 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15550 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15552 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15554 /* Adjust the this parameter. */
15555 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15556 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15558 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15559 xops[0] = GEN_INT (vcall_offset);
15561 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15562 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15564 xops[1] = this_reg;
15566 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15568 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15571 /* If necessary, drop THIS back to its stack slot. */
15572 if (this_reg && this_reg != this)
15574 xops[0] = this_reg;
15576 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15579 xops[0] = XEXP (DECL_RTL (function), 0);
15582 if (!flag_pic || (*targetm.binds_local_p) (function))
15583 output_asm_insn ("jmp\t%P0", xops);
15586 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15587 tmp = gen_rtx_CONST (Pmode, tmp);
15588 tmp = gen_rtx_MEM (QImode, tmp);
15590 output_asm_insn ("jmp\t%A0", xops);
15595 if (!flag_pic || (*targetm.binds_local_p) (function))
15596 output_asm_insn ("jmp\t%P0", xops);
15601 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15602 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15603 tmp = gen_rtx_MEM (QImode, tmp);
15605 output_asm_insn ("jmp\t%0", xops);
15608 #endif /* TARGET_MACHO */
15610 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15611 output_set_got (tmp);
15614 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15615 output_asm_insn ("jmp\t{*}%1", xops);
15623 default_file_start ();
15624 if (X86_FILE_START_VERSION_DIRECTIVE)
15625 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15626 if (X86_FILE_START_FLTUSED)
15627 fputs ("\t.global\t__fltused\n", asm_out_file);
15628 if (ix86_asm_dialect == ASM_INTEL)
15629 fputs ("\t.intel_syntax\n", asm_out_file);
15633 x86_field_alignment (field, computed)
15637 enum machine_mode mode;
15638 tree type = TREE_TYPE (field);
15640 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15642 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15643 ? get_inner_array_type (type) : type);
15644 if (mode == DFmode || mode == DCmode
15645 || GET_MODE_CLASS (mode) == MODE_INT
15646 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15647 return MIN (32, computed);
15651 /* Output assembler code to FILE to increment profiler label # LABELNO
15652 for profiling a function entry. */
15654 x86_function_profiler (file, labelno)
15656 int labelno ATTRIBUTE_UNUSED;
15661 #ifndef NO_PROFILE_COUNTERS
15662 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15664 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15668 #ifndef NO_PROFILE_COUNTERS
15669 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15671 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15675 #ifndef NO_PROFILE_COUNTERS
15676 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15677 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15679 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15683 #ifndef NO_PROFILE_COUNTERS
15684 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15685 PROFILE_COUNT_REGISTER);
15687 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15691 /* We don't have exact information about the insn sizes, but we may assume
15692 quite safely that we are informed about all 1 byte insns and memory
15693 address sizes. This is enought to elliminate unnecesary padding in
15697 min_insn_size (insn)
15702 if (!INSN_P (insn) || !active_insn_p (insn))
15705 /* Discard alignments we've emit and jump instructions. */
15706 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15707 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15709 if (GET_CODE (insn) == JUMP_INSN
15710 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15711 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15714 /* Important case - calls are always 5 bytes.
15715 It is common to have many calls in the row. */
15716 if (GET_CODE (insn) == CALL_INSN
15717 && symbolic_reference_mentioned_p (PATTERN (insn))
15718 && !SIBLING_CALL_P (insn))
15720 if (get_attr_length (insn) <= 1)
15723 /* For normal instructions we may rely on the sizes of addresses
15724 and the presence of symbol to require 4 bytes of encoding.
15725 This is not the case for jumps where references are PC relative. */
15726 if (GET_CODE (insn) != JUMP_INSN)
15728 l = get_attr_length_address (insn);
15729 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15738 /* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte
15742 k8_avoid_jump_misspredicts ()
15744 rtx insn, start = get_insns ();
15745 int nbytes = 0, njumps = 0;
15748 /* Look for all minimal intervals of instructions containing 4 jumps.
15749 The intervals are bounded by START and INSN. NBYTES is the total
15750 size of instructions in the interval including INSN and not including
15751 START. When the NBYTES is smaller than 16 bytes, it is possible
15752 that the end of START and INSN ends up in the same 16byte page.
15754 The smallest offset in the page INSN can start is the case where START
15755 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
15756 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15758 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15761 nbytes += min_insn_size (insn);
15763 fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15764 INSN_UID (insn), min_insn_size (insn));
15765 if ((GET_CODE (insn) == JUMP_INSN
15766 && GET_CODE (PATTERN (insn)) != ADDR_VEC
15767 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15768 || GET_CODE (insn) == CALL_INSN)
15775 start = NEXT_INSN (start);
15776 if ((GET_CODE (start) == JUMP_INSN
15777 && GET_CODE (PATTERN (start)) != ADDR_VEC
15778 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15779 || GET_CODE (start) == CALL_INSN)
15780 njumps--, isjump = 1;
15783 nbytes -= min_insn_size (start);
15788 fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15789 INSN_UID (start), INSN_UID (insn), nbytes);
15791 if (njumps == 3 && isjump && nbytes < 16)
15793 int padsize = 15 - nbytes + min_insn_size (insn);
15796 fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15797 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15802 /* Implement machine specific optimizations.
15803 At the moment we implement single transformation: AMD Athlon works faster
15804 when RET is not destination of conditional jump or directly preceded
15805 by other jump instruction. We avoid the penalty by inserting NOP just
15806 before the RET instructions in such cases. */
15812 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15814 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15816 basic_block bb = e->src;
15819 bool replace = false;
15821 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15822 || !maybe_hot_bb_p (bb))
15824 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15825 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15827 if (prev && GET_CODE (prev) == CODE_LABEL)
15830 for (e = bb->pred; e; e = e->pred_next)
15831 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15832 && !(e->flags & EDGE_FALLTHRU))
15837 prev = prev_active_insn (ret);
15839 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15840 || GET_CODE (prev) == CALL_INSN))
15842 /* Empty functions get branch misspredict even when the jump destination
15843 is not visible to us. */
15844 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15849 emit_insn_before (gen_return_internal_long (), ret);
15853 k8_avoid_jump_misspredicts ();
15856 /* Return nonzero when QImode register that must be represented via REX prefix
15859 x86_extended_QIreg_mentioned_p (insn)
15863 extract_insn_cached (insn);
15864 for (i = 0; i < recog_data.n_operands; i++)
15865 if (REG_P (recog_data.operand[i])
15866 && REGNO (recog_data.operand[i]) >= 4)
15871 /* Return nonzero when P points to register encoded via REX prefix.
15872 Called via for_each_rtx. */
15874 extended_reg_mentioned_1 (p, data)
15876 void *data ATTRIBUTE_UNUSED;
15878 unsigned int regno;
15881 regno = REGNO (*p);
15882 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15885 /* Return true when INSN mentions register that must be encoded using REX
15888 x86_extended_reg_mentioned_p (insn)
15891 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15894 /* Generate an unsigned DImode to FP conversion. This is the same code
15895 optabs would emit if we didn't have TFmode patterns. */
15898 x86_emit_floatuns (operands)
15901 rtx neglab, donelab, i0, i1, f0, in, out;
15902 enum machine_mode mode;
15905 in = force_reg (DImode, operands[1]);
15906 mode = GET_MODE (out);
15907 neglab = gen_label_rtx ();
15908 donelab = gen_label_rtx ();
15909 i1 = gen_reg_rtx (Pmode);
15910 f0 = gen_reg_rtx (mode);
15912 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15914 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15915 emit_jump_insn (gen_jump (donelab));
15918 emit_label (neglab);
15920 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15921 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15922 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15923 expand_float (f0, i0, 0);
15924 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15926 emit_label (donelab);
15929 /* Return if we do not know how to pass TYPE solely in registers. */
15931 ix86_must_pass_in_stack (mode, type)
15932 enum machine_mode mode;
15935 if (default_must_pass_in_stack (mode, type))
15937 return (!TARGET_64BIT && type && mode == TImode);
15940 #include "gt-i386.h"