1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 /* In case the average insn count for single function invocation is
526 lower than this constant, emit fast (but longer) prologue and
528 #define FAST_PROLOGUE_INSN_COUNT 20
530 /* Set by prologue expander and used by epilogue expander to determine
532 static int use_fast_prologue_epilogue;
534 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
535 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
536 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
537 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
539 /* Array of the smallest class containing reg number REGNO, indexed by
540 REGNO. Used by REGNO_REG_CLASS in i386.h. */
542 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
545 AREG, DREG, CREG, BREG,
547 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
549 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
550 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
553 /* flags, fpsr, dirflag, frame */
554 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
555 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
557 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
565 /* The "default" register map used in 32bit mode. */
567 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
569 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
570 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
571 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
572 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
573 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
578 static int const x86_64_int_parameter_registers[6] =
580 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
581 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
584 static int const x86_64_int_return_registers[4] =
586 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
589 /* The "default" register map used in 64bit mode. */
590 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
592 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
593 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
594 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
595 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
596 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
597 8,9,10,11,12,13,14,15, /* extended integer registers */
598 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
601 /* Define the register numbers to be used in Dwarf debugging information.
602 The SVR4 reference port C compiler uses the following register numbers
603 in its Dwarf output code:
604 0 for %eax (gcc regno = 0)
605 1 for %ecx (gcc regno = 2)
606 2 for %edx (gcc regno = 1)
607 3 for %ebx (gcc regno = 3)
608 4 for %esp (gcc regno = 7)
609 5 for %ebp (gcc regno = 6)
610 6 for %esi (gcc regno = 4)
611 7 for %edi (gcc regno = 5)
612 The following three DWARF register numbers are never generated by
613 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
614 believes these numbers have these meanings.
615 8 for %eip (no gcc equivalent)
616 9 for %eflags (gcc regno = 17)
617 10 for %trapno (no gcc equivalent)
618 It is not at all clear how we should number the FP stack registers
619 for the x86 architecture. If the version of SDB on x86/svr4 were
620 a bit less brain dead with respect to floating-point then we would
621 have a precedent to follow with respect to DWARF register numbers
622 for x86 FP registers, but the SDB on x86/svr4 is so completely
623 broken with respect to FP registers that it is hardly worth thinking
624 of it as something to strive for compatibility with.
625 The version of x86/svr4 SDB I have at the moment does (partially)
626 seem to believe that DWARF register number 11 is associated with
627 the x86 register %st(0), but that's about all. Higher DWARF
628 register numbers don't seem to be associated with anything in
629 particular, and even for DWARF regno 11, SDB only seems to under-
630 stand that it should say that a variable lives in %st(0) (when
631 asked via an `=' command) if we said it was in DWARF regno 11,
632 but SDB still prints garbage when asked for the value of the
633 variable in question (via a `/' command).
634 (Also note that the labels SDB prints for various FP stack regs
635 when doing an `x' command are all wrong.)
636 Note that these problems generally don't affect the native SVR4
637 C compiler because it doesn't allow the use of -O with -g and
638 because when it is *not* optimizing, it allocates a memory
639 location for each floating-point variable, and the memory
640 location is what gets described in the DWARF AT_location
641 attribute for the variable in question.
642 Regardless of the severe mental illness of the x86/svr4 SDB, we
643 do something sensible here and we use the following DWARF
644 register numbers. Note that these are all stack-top-relative
646 11 for %st(0) (gcc regno = 8)
647 12 for %st(1) (gcc regno = 9)
648 13 for %st(2) (gcc regno = 10)
649 14 for %st(3) (gcc regno = 11)
650 15 for %st(4) (gcc regno = 12)
651 16 for %st(5) (gcc regno = 13)
652 17 for %st(6) (gcc regno = 14)
653 18 for %st(7) (gcc regno = 15)
655 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
657 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
658 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
659 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
660 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
661 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
666 /* Test and compare insns in i386.md store the information needed to
667 generate branch and scc insns here. */
669 rtx ix86_compare_op0 = NULL_RTX;
670 rtx ix86_compare_op1 = NULL_RTX;
672 /* The encoding characters for the four TLS models present in ELF. */
674 static char const tls_model_chars[] = " GLil";
676 #define MAX_386_STACK_LOCALS 3
677 /* Size of the register save area. */
678 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
680 /* Define the structure for the machine field in struct function. */
681 struct machine_function GTY(())
683 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
684 const char *some_ld_name;
685 int save_varrargs_registers;
686 int accesses_prev_frame;
689 #define ix86_stack_locals (cfun->machine->stack_locals)
690 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
692 /* Structure describing stack frame layout.
693 Stack grows downward:
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
706 > to_allocate <- FRAME_POINTER
718 int outgoing_arguments_size;
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
733 enum cmodel ix86_cmodel;
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_cpu;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
789 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
790 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
793 static const char *get_some_local_dynamic_name PARAMS ((void));
794 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795 static rtx maybe_get_pool_constant PARAMS ((rtx));
796 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
797 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
799 static rtx get_thread_pointer PARAMS ((void));
800 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
801 static rtx gen_push PARAMS ((rtx));
802 static int memory_address_length PARAMS ((rtx addr));
803 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806 static void ix86_dump_ppro_packet PARAMS ((FILE *));
807 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
808 static struct machine_function * ix86_init_machine_status PARAMS ((void));
809 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
810 static int ix86_nsaved_regs PARAMS ((void));
811 static void ix86_emit_save_regs PARAMS ((void));
812 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
813 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
814 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
815 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
816 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
817 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
818 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
819 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
821 static int ix86_issue_rate PARAMS ((void));
822 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823 static void ix86_sched_init PARAMS ((FILE *, int, int));
824 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
826 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827 static int ia32_multipass_dfa_lookahead PARAMS ((void));
828 static void ix86_init_mmx_sse_builtins PARAMS ((void));
829 static rtx x86_this_parameter PARAMS ((tree));
830 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
842 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
843 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
845 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
846 static const char *ix86_strip_name_encoding PARAMS ((const char *))
849 struct builtin_description;
850 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
852 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
854 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
855 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
856 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
857 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
858 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
859 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
860 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
864 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
866 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
867 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
869 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
870 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
871 static int ix86_save_reg PARAMS ((unsigned int, int));
872 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
873 static int ix86_comp_type_attributes PARAMS ((tree, tree));
874 static int ix86_fntype_regparm PARAMS ((tree));
875 const struct attribute_spec ix86_attribute_table[];
876 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
877 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
878 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
879 static int ix86_value_regno PARAMS ((enum machine_mode));
880 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
881 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
883 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
885 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
886 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
889 /* Register class used for passing given 64bit part of the argument.
890 These represent classes as documented by the PS ABI, with the exception
891 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
892 use SF or DFmode move instead of DImode to avoid reformatting penalties.
894 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
895 whenever possible (upper half does contain padding).
897 enum x86_64_reg_class
900 X86_64_INTEGER_CLASS,
901 X86_64_INTEGERSI_CLASS,
910 static const char * const x86_64_reg_class_name[] =
911 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
913 #define MAX_CLASSES 4
914 static int classify_argument PARAMS ((enum machine_mode, tree,
915 enum x86_64_reg_class [MAX_CLASSES],
917 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
919 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
921 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
922 enum x86_64_reg_class));
924 /* Initialize the GCC target structure. */
925 #undef TARGET_ATTRIBUTE_TABLE
926 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
927 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
928 # undef TARGET_MERGE_DECL_ATTRIBUTES
929 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
932 #undef TARGET_COMP_TYPE_ATTRIBUTES
933 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
935 #undef TARGET_INIT_BUILTINS
936 #define TARGET_INIT_BUILTINS ix86_init_builtins
938 #undef TARGET_EXPAND_BUILTIN
939 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
941 #undef TARGET_ASM_FUNCTION_EPILOGUE
942 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
944 #undef TARGET_ASM_OPEN_PAREN
945 #define TARGET_ASM_OPEN_PAREN ""
946 #undef TARGET_ASM_CLOSE_PAREN
947 #define TARGET_ASM_CLOSE_PAREN ""
949 #undef TARGET_ASM_ALIGNED_HI_OP
950 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
951 #undef TARGET_ASM_ALIGNED_SI_OP
952 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
954 #undef TARGET_ASM_ALIGNED_DI_OP
955 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
958 #undef TARGET_ASM_UNALIGNED_HI_OP
959 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
960 #undef TARGET_ASM_UNALIGNED_SI_OP
961 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
962 #undef TARGET_ASM_UNALIGNED_DI_OP
963 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
965 #undef TARGET_SCHED_ADJUST_COST
966 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
967 #undef TARGET_SCHED_ISSUE_RATE
968 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
969 #undef TARGET_SCHED_VARIABLE_ISSUE
970 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
971 #undef TARGET_SCHED_INIT
972 #define TARGET_SCHED_INIT ix86_sched_init
973 #undef TARGET_SCHED_REORDER
974 #define TARGET_SCHED_REORDER ix86_sched_reorder
975 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
976 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
977 ia32_use_dfa_pipeline_interface
978 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
979 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
980 ia32_multipass_dfa_lookahead
982 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
983 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
986 #undef TARGET_HAVE_TLS
987 #define TARGET_HAVE_TLS true
989 #undef TARGET_CANNOT_FORCE_CONST_MEM
990 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
992 #undef TARGET_MS_BITFIELD_LAYOUT_P
993 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
995 #undef TARGET_ASM_OUTPUT_MI_THUNK
996 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
997 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
998 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1000 #undef TARGET_RTX_COSTS
1001 #define TARGET_RTX_COSTS ix86_rtx_costs
1003 struct gcc_target targetm = TARGET_INITIALIZER;
1005 /* Sometimes certain combinations of command options do not make
1006 sense on a particular target machine. You can define a macro
1007 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1008 defined, is executed once just after all the command options have
1011 Don't use this macro to turn on various extra optimizations for
1012 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1018 /* Comes from final.c -- no real reason to change it. */
1019 #define MAX_CODE_ALIGN 16
1023 const struct processor_costs *cost; /* Processor costs */
1024 const int target_enable; /* Target flags to enable. */
1025 const int target_disable; /* Target flags to disable. */
1026 const int align_loop; /* Default alignments. */
1027 const int align_loop_max_skip;
1028 const int align_jump;
1029 const int align_jump_max_skip;
1030 const int align_func;
1032 const processor_target_table[PROCESSOR_max] =
1034 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1035 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1036 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1037 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1038 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1039 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1040 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1041 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1044 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1047 const char *const name; /* processor name or nickname. */
1048 const enum processor_type processor;
1049 const enum pta_flags
1054 PTA_PREFETCH_SSE = 8,
1060 const processor_alias_table[] =
1062 {"i386", PROCESSOR_I386, 0},
1063 {"i486", PROCESSOR_I486, 0},
1064 {"i586", PROCESSOR_PENTIUM, 0},
1065 {"pentium", PROCESSOR_PENTIUM, 0},
1066 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1067 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1068 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1069 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1070 {"i686", PROCESSOR_PENTIUMPRO, 0},
1071 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1072 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1073 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1074 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1075 PTA_MMX | PTA_PREFETCH_SSE},
1076 {"k6", PROCESSOR_K6, PTA_MMX},
1077 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1078 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1079 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1081 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1082 | PTA_3DNOW | PTA_3DNOW_A},
1083 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1084 | PTA_3DNOW_A | PTA_SSE},
1085 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1086 | PTA_3DNOW_A | PTA_SSE},
1087 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1088 | PTA_3DNOW_A | PTA_SSE},
1089 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1090 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1093 int const pta_size = ARRAY_SIZE (processor_alias_table);
1095 /* By default our XFmode is the 80-bit extended format. If we have
1096 use TFmode instead, it's also the 80-bit format, but with padding. */
1097 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1098 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1100 /* Set the default values for switches whose default depends on TARGET_64BIT
1101 in case they weren't overwritten by command line options. */
1104 if (flag_omit_frame_pointer == 2)
1105 flag_omit_frame_pointer = 1;
1106 if (flag_asynchronous_unwind_tables == 2)
1107 flag_asynchronous_unwind_tables = 1;
1108 if (flag_pcc_struct_return == 2)
1109 flag_pcc_struct_return = 0;
1113 if (flag_omit_frame_pointer == 2)
1114 flag_omit_frame_pointer = 0;
1115 if (flag_asynchronous_unwind_tables == 2)
1116 flag_asynchronous_unwind_tables = 0;
1117 if (flag_pcc_struct_return == 2)
1118 flag_pcc_struct_return = 1;
1121 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1122 SUBTARGET_OVERRIDE_OPTIONS;
1125 if (!ix86_cpu_string && ix86_arch_string)
1126 ix86_cpu_string = ix86_arch_string;
1127 if (!ix86_cpu_string)
1128 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1129 if (!ix86_arch_string)
1130 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1132 if (ix86_cmodel_string != 0)
1134 if (!strcmp (ix86_cmodel_string, "small"))
1135 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1137 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1138 else if (!strcmp (ix86_cmodel_string, "32"))
1139 ix86_cmodel = CM_32;
1140 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1141 ix86_cmodel = CM_KERNEL;
1142 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1143 ix86_cmodel = CM_MEDIUM;
1144 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1145 ix86_cmodel = CM_LARGE;
1147 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1151 ix86_cmodel = CM_32;
1153 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1155 if (ix86_asm_string != 0)
1157 if (!strcmp (ix86_asm_string, "intel"))
1158 ix86_asm_dialect = ASM_INTEL;
1159 else if (!strcmp (ix86_asm_string, "att"))
1160 ix86_asm_dialect = ASM_ATT;
1162 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1164 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1165 error ("code model `%s' not supported in the %s bit mode",
1166 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1167 if (ix86_cmodel == CM_LARGE)
1168 sorry ("code model `large' not supported yet");
1169 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1170 sorry ("%i-bit mode not compiled in",
1171 (target_flags & MASK_64BIT) ? 64 : 32);
1173 for (i = 0; i < pta_size; i++)
1174 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1176 ix86_arch = processor_alias_table[i].processor;
1177 /* Default cpu tuning to the architecture. */
1178 ix86_cpu = ix86_arch;
1179 if (processor_alias_table[i].flags & PTA_MMX
1180 && !(target_flags_explicit & MASK_MMX))
1181 target_flags |= MASK_MMX;
1182 if (processor_alias_table[i].flags & PTA_3DNOW
1183 && !(target_flags_explicit & MASK_3DNOW))
1184 target_flags |= MASK_3DNOW;
1185 if (processor_alias_table[i].flags & PTA_3DNOW_A
1186 && !(target_flags_explicit & MASK_3DNOW_A))
1187 target_flags |= MASK_3DNOW_A;
1188 if (processor_alias_table[i].flags & PTA_SSE
1189 && !(target_flags_explicit & MASK_SSE))
1190 target_flags |= MASK_SSE;
1191 if (processor_alias_table[i].flags & PTA_SSE2
1192 && !(target_flags_explicit & MASK_SSE2))
1193 target_flags |= MASK_SSE2;
1194 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1195 x86_prefetch_sse = true;
1196 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1197 error ("CPU you selected does not support x86-64 instruction set");
1202 error ("bad value (%s) for -march= switch", ix86_arch_string);
1204 for (i = 0; i < pta_size; i++)
1205 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1207 ix86_cpu = processor_alias_table[i].processor;
1208 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1209 error ("CPU you selected does not support x86-64 instruction set");
1212 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1213 x86_prefetch_sse = true;
1215 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1218 ix86_cost = &size_cost;
1220 ix86_cost = processor_target_table[ix86_cpu].cost;
1221 target_flags |= processor_target_table[ix86_cpu].target_enable;
1222 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1224 /* Arrange to set up i386_stack_locals for all functions. */
1225 init_machine_status = ix86_init_machine_status;
1227 /* Validate -mregparm= value. */
1228 if (ix86_regparm_string)
1230 i = atoi (ix86_regparm_string);
1231 if (i < 0 || i > REGPARM_MAX)
1232 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1238 ix86_regparm = REGPARM_MAX;
1240 /* If the user has provided any of the -malign-* options,
1241 warn and use that value only if -falign-* is not set.
1242 Remove this code in GCC 3.2 or later. */
1243 if (ix86_align_loops_string)
1245 warning ("-malign-loops is obsolete, use -falign-loops");
1246 if (align_loops == 0)
1248 i = atoi (ix86_align_loops_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1252 align_loops = 1 << i;
1256 if (ix86_align_jumps_string)
1258 warning ("-malign-jumps is obsolete, use -falign-jumps");
1259 if (align_jumps == 0)
1261 i = atoi (ix86_align_jumps_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1265 align_jumps = 1 << i;
1269 if (ix86_align_funcs_string)
1271 warning ("-malign-functions is obsolete, use -falign-functions");
1272 if (align_functions == 0)
1274 i = atoi (ix86_align_funcs_string);
1275 if (i < 0 || i > MAX_CODE_ALIGN)
1276 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1278 align_functions = 1 << i;
1282 /* Default align_* from the processor table. */
1283 if (align_loops == 0)
1285 align_loops = processor_target_table[ix86_cpu].align_loop;
1286 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1288 if (align_jumps == 0)
1290 align_jumps = processor_target_table[ix86_cpu].align_jump;
1291 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1293 if (align_functions == 0)
1295 align_functions = processor_target_table[ix86_cpu].align_func;
1298 /* Validate -mpreferred-stack-boundary= value, or provide default.
1299 The default of 128 bits is for Pentium III's SSE __m128, but we
1300 don't want additional code to keep the stack aligned when
1301 optimizing for code size. */
1302 ix86_preferred_stack_boundary = (optimize_size
1303 ? TARGET_64BIT ? 128 : 32
1305 if (ix86_preferred_stack_boundary_string)
1307 i = atoi (ix86_preferred_stack_boundary_string);
1308 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1309 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1310 TARGET_64BIT ? 4 : 2);
1312 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1315 /* Validate -mbranch-cost= value, or provide default. */
1316 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1317 if (ix86_branch_cost_string)
1319 i = atoi (ix86_branch_cost_string);
1321 error ("-mbranch-cost=%d is not between 0 and 5", i);
1323 ix86_branch_cost = i;
1326 if (ix86_tls_dialect_string)
1328 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1329 ix86_tls_dialect = TLS_DIALECT_GNU;
1330 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1331 ix86_tls_dialect = TLS_DIALECT_SUN;
1333 error ("bad value (%s) for -mtls-dialect= switch",
1334 ix86_tls_dialect_string);
1337 /* Keep nonleaf frame pointers. */
1338 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1339 flag_omit_frame_pointer = 1;
1341 /* If we're doing fast math, we don't care about comparison order
1342 wrt NaNs. This lets us use a shorter comparison sequence. */
1343 if (flag_unsafe_math_optimizations)
1344 target_flags &= ~MASK_IEEE_FP;
1346 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1347 since the insns won't need emulation. */
1348 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1349 target_flags &= ~MASK_NO_FANCY_MATH_387;
1353 if (TARGET_ALIGN_DOUBLE)
1354 error ("-malign-double makes no sense in the 64bit mode");
1356 error ("-mrtd calling convention not supported in the 64bit mode");
1357 /* Enable by default the SSE and MMX builtins. */
1358 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1359 ix86_fpmath = FPMATH_SSE;
1362 ix86_fpmath = FPMATH_387;
1364 if (ix86_fpmath_string != 0)
1366 if (! strcmp (ix86_fpmath_string, "387"))
1367 ix86_fpmath = FPMATH_387;
1368 else if (! strcmp (ix86_fpmath_string, "sse"))
1372 warning ("SSE instruction set disabled, using 387 arithmetics");
1373 ix86_fpmath = FPMATH_387;
1376 ix86_fpmath = FPMATH_SSE;
1378 else if (! strcmp (ix86_fpmath_string, "387,sse")
1379 || ! strcmp (ix86_fpmath_string, "sse,387"))
1383 warning ("SSE instruction set disabled, using 387 arithmetics");
1384 ix86_fpmath = FPMATH_387;
1386 else if (!TARGET_80387)
1388 warning ("387 instruction set disabled, using SSE arithmetics");
1389 ix86_fpmath = FPMATH_SSE;
1392 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1395 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1398 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1402 target_flags |= MASK_MMX;
1403 x86_prefetch_sse = true;
1406 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1409 target_flags |= MASK_MMX;
1410 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1411 extensions it adds. */
1412 if (x86_3dnow_a & (1 << ix86_arch))
1413 target_flags |= MASK_3DNOW_A;
1415 if ((x86_accumulate_outgoing_args & CPUMASK)
1416 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1418 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1420 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1423 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1424 p = strchr (internal_label_prefix, 'X');
1425 internal_label_prefix_len = p - internal_label_prefix;
1431 optimization_options (level, size)
1433 int size ATTRIBUTE_UNUSED;
1435 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1436 make the problem with not enough registers even worse. */
1437 #ifdef INSN_SCHEDULING
1439 flag_schedule_insns = 0;
1442 /* The default values of these switches depend on the TARGET_64BIT
1443 that is not known at this moment. Mark these values with 2 and
1444 let user the to override these. In case there is no command line option
1445 specifying them, we will set the defaults in override_options. */
1447 flag_omit_frame_pointer = 2;
1448 flag_pcc_struct_return = 2;
1449 flag_asynchronous_unwind_tables = 2;
1452 /* Table of valid machine attributes. */
1453 const struct attribute_spec ix86_attribute_table[] =
1455 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1456 /* Stdcall attribute says callee is responsible for popping arguments
1457 if they are not variable. */
1458 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1459 /* Fastcall attribute says callee is responsible for popping arguments
1460 if they are not variable. */
1461 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1462 /* Cdecl attribute says the callee is a normal C declaration */
1463 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1464 /* Regparm attribute specifies how many integer arguments are to be
1465 passed in registers. */
1466 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1467 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1468 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1469 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1470 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1472 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1473 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1474 { NULL, 0, 0, false, false, false, NULL }
1477 /* If PIC, we cannot make sibling calls to global functions
1478 because the PLT requires %ebx live.
1479 If we are returning floats on the register stack, we cannot make
1480 sibling calls to functions that return floats. (The stack adjust
1481 instruction will wind up after the sibcall jump, and not be executed.) */
1484 ix86_function_ok_for_sibcall (decl, exp)
1488 /* If we are generating position-independent code, we cannot sibcall
1489 optimize any indirect call, or a direct call to a global function,
1490 as the PLT requires %ebx be live. */
1491 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1494 /* If we are returning floats on the 80387 register stack, we cannot
1495 make a sibcall from a function that doesn't return a float to a
1496 function that does; the necessary stack adjustment will not be
1498 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1499 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1502 /* If this call is indirect, we'll need to be able to use a call-clobbered
1503 register for the address of the target function. Make sure that all
1504 such registers are not used for passing parameters. */
1505 if (!decl && !TARGET_64BIT)
1507 int regparm = ix86_regparm;
1510 /* We're looking at the CALL_EXPR, we need the type of the function. */
1511 type = TREE_OPERAND (exp, 0); /* pointer expression */
1512 type = TREE_TYPE (type); /* pointer type */
1513 type = TREE_TYPE (type); /* function type */
1515 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1517 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1521 /* ??? Need to count the actual number of registers to be used,
1522 not the possible number of registers. Fix later. */
1527 /* Otherwise okay. That also includes certain types of indirect calls. */
1531 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1532 arguments as in struct attribute_spec.handler. */
1534 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1537 tree args ATTRIBUTE_UNUSED;
1538 int flags ATTRIBUTE_UNUSED;
1541 if (TREE_CODE (*node) != FUNCTION_TYPE
1542 && TREE_CODE (*node) != METHOD_TYPE
1543 && TREE_CODE (*node) != FIELD_DECL
1544 && TREE_CODE (*node) != TYPE_DECL)
1546 warning ("`%s' attribute only applies to functions",
1547 IDENTIFIER_POINTER (name));
1548 *no_add_attrs = true;
1552 if (is_attribute_p ("fastcall", name))
1554 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1556 error ("fastcall and stdcall attributes are not compatible");
1558 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1560 error ("fastcall and regparm attributes are not compatible");
1563 else if (is_attribute_p ("stdcall", name))
1565 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1567 error ("fastcall and stdcall attributes are not compatible");
1574 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1575 *no_add_attrs = true;
1581 /* Handle a "regparm" attribute;
1582 arguments as in struct attribute_spec.handler. */
1584 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1588 int flags ATTRIBUTE_UNUSED;
1591 if (TREE_CODE (*node) != FUNCTION_TYPE
1592 && TREE_CODE (*node) != METHOD_TYPE
1593 && TREE_CODE (*node) != FIELD_DECL
1594 && TREE_CODE (*node) != TYPE_DECL)
1596 warning ("`%s' attribute only applies to functions",
1597 IDENTIFIER_POINTER (name));
1598 *no_add_attrs = true;
1604 cst = TREE_VALUE (args);
1605 if (TREE_CODE (cst) != INTEGER_CST)
1607 warning ("`%s' attribute requires an integer constant argument",
1608 IDENTIFIER_POINTER (name));
1609 *no_add_attrs = true;
1611 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1613 warning ("argument to `%s' attribute larger than %d",
1614 IDENTIFIER_POINTER (name), REGPARM_MAX);
1615 *no_add_attrs = true;
1618 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1620 error ("fastcall and regparm attributes are not compatible");
1627 /* Return 0 if the attributes for two types are incompatible, 1 if they
1628 are compatible, and 2 if they are nearly compatible (which causes a
1629 warning to be generated). */
1632 ix86_comp_type_attributes (type1, type2)
1636 /* Check for mismatch of non-default calling convention. */
1637 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1639 if (TREE_CODE (type1) != FUNCTION_TYPE)
1642 /* Check for mismatched fastcall types */
1643 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1644 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1647 /* Check for mismatched return types (cdecl vs stdcall). */
1648 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1649 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1654 /* Return the regparm value for a fuctio with the indicated TYPE. */
1657 ix86_fntype_regparm (type)
1662 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1664 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1666 return ix86_regparm;
1669 /* Value is the number of bytes of arguments automatically
1670 popped when returning from a subroutine call.
1671 FUNDECL is the declaration node of the function (as a tree),
1672 FUNTYPE is the data type of the function (as a tree),
1673 or for a library call it is an identifier node for the subroutine name.
1674 SIZE is the number of bytes of arguments passed on the stack.
1676 On the 80386, the RTD insn may be used to pop them if the number
1677 of args is fixed, but if the number is variable then the caller
1678 must pop them all. RTD can't be used for library calls now
1679 because the library is compiled with the Unix compiler.
1680 Use of RTD is a selectable option, since it is incompatible with
1681 standard Unix calling sequences. If the option is not selected,
1682 the caller must always pop the args.
1684 The attribute stdcall is equivalent to RTD on a per module basis. */
1687 ix86_return_pops_args (fundecl, funtype, size)
1692 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1694 /* Cdecl functions override -mrtd, and never pop the stack. */
1695 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1697 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1698 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1699 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1703 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1704 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1705 == void_type_node)))
1709 /* Lose any fake structure return argument if it is passed on the stack. */
1710 if (aggregate_value_p (TREE_TYPE (funtype))
1713 int nregs = ix86_fntype_regparm (funtype);
1716 return GET_MODE_SIZE (Pmode);
1722 /* Argument support functions. */
1724 /* Return true when register may be used to pass function parameters. */
1726 ix86_function_arg_regno_p (regno)
1731 return (regno < REGPARM_MAX
1732 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1733 if (SSE_REGNO_P (regno) && TARGET_SSE)
1735 /* RAX is used as hidden argument to va_arg functions. */
1738 for (i = 0; i < REGPARM_MAX; i++)
1739 if (regno == x86_64_int_parameter_registers[i])
1744 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1745 for a call to a function whose data type is FNTYPE.
1746 For a library call, FNTYPE is 0. */
1749 init_cumulative_args (cum, fntype, libname)
1750 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1751 tree fntype; /* tree ptr for function decl */
1752 rtx libname; /* SYMBOL_REF of library name or 0 */
1754 static CUMULATIVE_ARGS zero_cum;
1755 tree param, next_param;
1757 if (TARGET_DEBUG_ARG)
1759 fprintf (stderr, "\ninit_cumulative_args (");
1761 fprintf (stderr, "fntype code = %s, ret code = %s",
1762 tree_code_name[(int) TREE_CODE (fntype)],
1763 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1765 fprintf (stderr, "no fntype");
1768 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1773 /* Set up the number of registers to use for passing arguments. */
1774 cum->nregs = ix86_regparm;
1775 cum->sse_nregs = SSE_REGPARM_MAX;
1776 if (fntype && !TARGET_64BIT)
1778 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1781 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1783 cum->maybe_vaarg = false;
1785 /* Use ecx and edx registers if function has fastcall attribute */
1786 if (fntype && !TARGET_64BIT)
1788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1796 /* Determine if this function has variable arguments. This is
1797 indicated by the last argument being 'void_type_mode' if there
1798 are no variable arguments. If there are variable arguments, then
1799 we won't pass anything in registers */
1803 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1804 param != 0; param = next_param)
1806 next_param = TREE_CHAIN (param);
1807 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1814 cum->maybe_vaarg = true;
1818 if ((!fntype && !libname)
1819 || (fntype && !TYPE_ARG_TYPES (fntype)))
1820 cum->maybe_vaarg = 1;
1822 if (TARGET_DEBUG_ARG)
1823 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1828 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1829 of this code is to classify each 8bytes of incoming argument by the register
1830 class and assign registers accordingly. */
1832 /* Return the union class of CLASS1 and CLASS2.
1833 See the x86-64 PS ABI for details. */
1835 static enum x86_64_reg_class
1836 merge_classes (class1, class2)
1837 enum x86_64_reg_class class1, class2;
1839 /* Rule #1: If both classes are equal, this is the resulting class. */
1840 if (class1 == class2)
1843 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1845 if (class1 == X86_64_NO_CLASS)
1847 if (class2 == X86_64_NO_CLASS)
1850 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1851 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1852 return X86_64_MEMORY_CLASS;
1854 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1855 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1856 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1857 return X86_64_INTEGERSI_CLASS;
1858 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1859 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1860 return X86_64_INTEGER_CLASS;
1862 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1863 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1864 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1865 return X86_64_MEMORY_CLASS;
1867 /* Rule #6: Otherwise class SSE is used. */
1868 return X86_64_SSE_CLASS;
1871 /* Classify the argument of type TYPE and mode MODE.
1872 CLASSES will be filled by the register class used to pass each word
1873 of the operand. The number of words is returned. In case the parameter
1874 should be passed in memory, 0 is returned. As a special case for zero
1875 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1877 BIT_OFFSET is used internally for handling records and specifies offset
1878 of the offset in bits modulo 256 to avoid overflow cases.
1880 See the x86-64 PS ABI for details.
1884 classify_argument (mode, type, classes, bit_offset)
1885 enum machine_mode mode;
1887 enum x86_64_reg_class classes[MAX_CLASSES];
1891 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1892 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1894 /* Variable sized entities are always passed/returned in memory. */
1898 if (type && AGGREGATE_TYPE_P (type))
1902 enum x86_64_reg_class subclasses[MAX_CLASSES];
1904 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1908 for (i = 0; i < words; i++)
1909 classes[i] = X86_64_NO_CLASS;
1911 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1912 signalize memory class, so handle it as special case. */
1915 classes[0] = X86_64_NO_CLASS;
1919 /* Classify each field of record and merge classes. */
1920 if (TREE_CODE (type) == RECORD_TYPE)
1922 /* For classes first merge in the field of the subclasses. */
1923 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1925 tree bases = TYPE_BINFO_BASETYPES (type);
1926 int n_bases = TREE_VEC_LENGTH (bases);
1929 for (i = 0; i < n_bases; ++i)
1931 tree binfo = TREE_VEC_ELT (bases, i);
1933 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1934 tree type = BINFO_TYPE (binfo);
1936 num = classify_argument (TYPE_MODE (type),
1938 (offset + bit_offset) % 256);
1941 for (i = 0; i < num; i++)
1943 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1945 merge_classes (subclasses[i], classes[i + pos]);
1949 /* And now merge the fields of structure. */
1950 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1952 if (TREE_CODE (field) == FIELD_DECL)
1956 /* Bitfields are always classified as integer. Handle them
1957 early, since later code would consider them to be
1958 misaligned integers. */
1959 if (DECL_BIT_FIELD (field))
1961 for (i = int_bit_position (field) / 8 / 8;
1962 i < (int_bit_position (field)
1963 + tree_low_cst (DECL_SIZE (field), 0)
1966 merge_classes (X86_64_INTEGER_CLASS,
1971 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1972 TREE_TYPE (field), subclasses,
1973 (int_bit_position (field)
1974 + bit_offset) % 256);
1977 for (i = 0; i < num; i++)
1980 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1982 merge_classes (subclasses[i], classes[i + pos]);
1988 /* Arrays are handled as small records. */
1989 else if (TREE_CODE (type) == ARRAY_TYPE)
1992 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1993 TREE_TYPE (type), subclasses, bit_offset);
1997 /* The partial classes are now full classes. */
1998 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1999 subclasses[0] = X86_64_SSE_CLASS;
2000 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2001 subclasses[0] = X86_64_INTEGER_CLASS;
2003 for (i = 0; i < words; i++)
2004 classes[i] = subclasses[i % num];
2006 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2007 else if (TREE_CODE (type) == UNION_TYPE
2008 || TREE_CODE (type) == QUAL_UNION_TYPE)
2010 /* For classes first merge in the field of the subclasses. */
2011 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2013 tree bases = TYPE_BINFO_BASETYPES (type);
2014 int n_bases = TREE_VEC_LENGTH (bases);
2017 for (i = 0; i < n_bases; ++i)
2019 tree binfo = TREE_VEC_ELT (bases, i);
2021 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2022 tree type = BINFO_TYPE (binfo);
2024 num = classify_argument (TYPE_MODE (type),
2026 (offset + (bit_offset % 64)) % 256);
2029 for (i = 0; i < num; i++)
2031 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2033 merge_classes (subclasses[i], classes[i + pos]);
2037 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2039 if (TREE_CODE (field) == FIELD_DECL)
2042 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2043 TREE_TYPE (field), subclasses,
2047 for (i = 0; i < num; i++)
2048 classes[i] = merge_classes (subclasses[i], classes[i]);
2055 /* Final merger cleanup. */
2056 for (i = 0; i < words; i++)
2058 /* If one class is MEMORY, everything should be passed in
2060 if (classes[i] == X86_64_MEMORY_CLASS)
2063 /* The X86_64_SSEUP_CLASS should be always preceded by
2064 X86_64_SSE_CLASS. */
2065 if (classes[i] == X86_64_SSEUP_CLASS
2066 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2067 classes[i] = X86_64_SSE_CLASS;
2069 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2070 if (classes[i] == X86_64_X87UP_CLASS
2071 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2072 classes[i] = X86_64_SSE_CLASS;
2077 /* Compute alignment needed. We align all types to natural boundaries with
2078 exception of XFmode that is aligned to 64bits. */
2079 if (mode != VOIDmode && mode != BLKmode)
2081 int mode_alignment = GET_MODE_BITSIZE (mode);
2084 mode_alignment = 128;
2085 else if (mode == XCmode)
2086 mode_alignment = 256;
2087 /* Misaligned fields are always returned in memory. */
2088 if (bit_offset % mode_alignment)
2092 /* Classification of atomic types. */
2102 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2103 classes[0] = X86_64_INTEGERSI_CLASS;
2105 classes[0] = X86_64_INTEGER_CLASS;
2109 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2112 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2113 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2116 if (!(bit_offset % 64))
2117 classes[0] = X86_64_SSESF_CLASS;
2119 classes[0] = X86_64_SSE_CLASS;
2122 classes[0] = X86_64_SSEDF_CLASS;
2125 classes[0] = X86_64_X87_CLASS;
2126 classes[1] = X86_64_X87UP_CLASS;
2129 classes[0] = X86_64_X87_CLASS;
2130 classes[1] = X86_64_X87UP_CLASS;
2131 classes[2] = X86_64_X87_CLASS;
2132 classes[3] = X86_64_X87UP_CLASS;
2135 classes[0] = X86_64_SSEDF_CLASS;
2136 classes[1] = X86_64_SSEDF_CLASS;
2139 classes[0] = X86_64_SSE_CLASS;
2147 classes[0] = X86_64_SSE_CLASS;
2148 classes[1] = X86_64_SSEUP_CLASS;
2163 /* Examine the argument and return set number of register required in each
2164 class. Return 0 iff parameter should be passed in memory. */
2166 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2167 enum machine_mode mode;
2169 int *int_nregs, *sse_nregs;
2172 enum x86_64_reg_class class[MAX_CLASSES];
2173 int n = classify_argument (mode, type, class, 0);
2179 for (n--; n >= 0; n--)
2182 case X86_64_INTEGER_CLASS:
2183 case X86_64_INTEGERSI_CLASS:
2186 case X86_64_SSE_CLASS:
2187 case X86_64_SSESF_CLASS:
2188 case X86_64_SSEDF_CLASS:
2191 case X86_64_NO_CLASS:
2192 case X86_64_SSEUP_CLASS:
2194 case X86_64_X87_CLASS:
2195 case X86_64_X87UP_CLASS:
2199 case X86_64_MEMORY_CLASS:
2204 /* Construct container for the argument used by GCC interface. See
2205 FUNCTION_ARG for the detailed description. */
2207 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2208 enum machine_mode mode;
2211 int nintregs, nsseregs;
2215 enum machine_mode tmpmode;
2217 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2218 enum x86_64_reg_class class[MAX_CLASSES];
2222 int needed_sseregs, needed_intregs;
2223 rtx exp[MAX_CLASSES];
2226 n = classify_argument (mode, type, class, 0);
2227 if (TARGET_DEBUG_ARG)
2230 fprintf (stderr, "Memory class\n");
2233 fprintf (stderr, "Classes:");
2234 for (i = 0; i < n; i++)
2236 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2238 fprintf (stderr, "\n");
2243 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2245 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2248 /* First construct simple cases. Avoid SCmode, since we want to use
2249 single register to pass this type. */
2250 if (n == 1 && mode != SCmode)
2253 case X86_64_INTEGER_CLASS:
2254 case X86_64_INTEGERSI_CLASS:
2255 return gen_rtx_REG (mode, intreg[0]);
2256 case X86_64_SSE_CLASS:
2257 case X86_64_SSESF_CLASS:
2258 case X86_64_SSEDF_CLASS:
2259 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2260 case X86_64_X87_CLASS:
2261 return gen_rtx_REG (mode, FIRST_STACK_REG);
2262 case X86_64_NO_CLASS:
2263 /* Zero sized array, struct or class. */
2268 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2269 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2271 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2272 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2273 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2274 && class[1] == X86_64_INTEGER_CLASS
2275 && (mode == CDImode || mode == TImode)
2276 && intreg[0] + 1 == intreg[1])
2277 return gen_rtx_REG (mode, intreg[0]);
2279 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2280 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2281 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2283 /* Otherwise figure out the entries of the PARALLEL. */
2284 for (i = 0; i < n; i++)
2288 case X86_64_NO_CLASS:
2290 case X86_64_INTEGER_CLASS:
2291 case X86_64_INTEGERSI_CLASS:
2292 /* Merge TImodes on aligned occasions here too. */
2293 if (i * 8 + 8 > bytes)
2294 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2295 else if (class[i] == X86_64_INTEGERSI_CLASS)
2299 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2300 if (tmpmode == BLKmode)
2302 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2303 gen_rtx_REG (tmpmode, *intreg),
2307 case X86_64_SSESF_CLASS:
2308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2309 gen_rtx_REG (SFmode,
2310 SSE_REGNO (sse_regno)),
2314 case X86_64_SSEDF_CLASS:
2315 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2316 gen_rtx_REG (DFmode,
2317 SSE_REGNO (sse_regno)),
2321 case X86_64_SSE_CLASS:
2322 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2326 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2327 gen_rtx_REG (tmpmode,
2328 SSE_REGNO (sse_regno)),
2330 if (tmpmode == TImode)
2338 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2339 for (i = 0; i < nexps; i++)
2340 XVECEXP (ret, 0, i) = exp [i];
2344 /* Update the data in CUM to advance over an argument
2345 of mode MODE and data type TYPE.
2346 (TYPE is null for libcalls where that information may not be available.) */
2349 function_arg_advance (cum, mode, type, named)
2350 CUMULATIVE_ARGS *cum; /* current arg information */
2351 enum machine_mode mode; /* current arg mode */
2352 tree type; /* type of the argument or 0 if lib support */
2353 int named; /* whether or not the argument was named */
2356 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2357 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2359 if (TARGET_DEBUG_ARG)
2361 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2362 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2365 int int_nregs, sse_nregs;
2366 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2367 cum->words += words;
2368 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2370 cum->nregs -= int_nregs;
2371 cum->sse_nregs -= sse_nregs;
2372 cum->regno += int_nregs;
2373 cum->sse_regno += sse_nregs;
2376 cum->words += words;
2380 if (TARGET_SSE && mode == TImode)
2382 cum->sse_words += words;
2383 cum->sse_nregs -= 1;
2384 cum->sse_regno += 1;
2385 if (cum->sse_nregs <= 0)
2393 cum->words += words;
2394 cum->nregs -= words;
2395 cum->regno += words;
2397 if (cum->nregs <= 0)
2407 /* Define where to put the arguments to a function.
2408 Value is zero to push the argument on the stack,
2409 or a hard register in which to store the argument.
2411 MODE is the argument's machine mode.
2412 TYPE is the data type of the argument (as a tree).
2413 This is null for libcalls where that information may
2415 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2416 the preceding args and about the function being called.
2417 NAMED is nonzero if this argument is a named parameter
2418 (otherwise it is an extra parameter matching an ellipsis). */
2421 function_arg (cum, mode, type, named)
2422 CUMULATIVE_ARGS *cum; /* current arg information */
2423 enum machine_mode mode; /* current arg mode */
2424 tree type; /* type of the argument or 0 if lib support */
2425 int named; /* != 0 for normal args, == 0 for ... args */
2429 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2430 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2432 /* Handle a hidden AL argument containing number of registers for varargs
2433 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2435 if (mode == VOIDmode)
2438 return GEN_INT (cum->maybe_vaarg
2439 ? (cum->sse_nregs < 0
2447 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2448 &x86_64_int_parameter_registers [cum->regno],
2453 /* For now, pass fp/complex values on the stack. */
2462 if (words <= cum->nregs)
2464 int regno = cum->regno;
2466 /* Fastcall allocates the first two DWORD (SImode) or
2467 smaller arguments to ECX and EDX. */
2470 if (mode == BLKmode || mode == DImode)
2473 /* ECX not EAX is the first allocated register. */
2477 ret = gen_rtx_REG (mode, regno);
2482 ret = gen_rtx_REG (mode, cum->sse_regno);
2486 if (TARGET_DEBUG_ARG)
2489 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2490 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2493 print_simple_rtl (stderr, ret);
2495 fprintf (stderr, ", stack");
2497 fprintf (stderr, " )\n");
2503 /* A C expression that indicates when an argument must be passed by
2504 reference. If nonzero for an argument, a copy of that argument is
2505 made in memory and a pointer to the argument is passed instead of
2506 the argument itself. The pointer is passed in whatever way is
2507 appropriate for passing a pointer to that type. */
2510 function_arg_pass_by_reference (cum, mode, type, named)
2511 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2512 enum machine_mode mode ATTRIBUTE_UNUSED;
2514 int named ATTRIBUTE_UNUSED;
2519 if (type && int_size_in_bytes (type) == -1)
2521 if (TARGET_DEBUG_ARG)
2522 fprintf (stderr, "function_arg_pass_by_reference\n");
2529 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2533 ix86_function_arg_boundary (mode, type)
2534 enum machine_mode mode;
2539 return PARM_BOUNDARY;
2541 align = TYPE_ALIGN (type);
2543 align = GET_MODE_ALIGNMENT (mode);
2544 if (align < PARM_BOUNDARY)
2545 align = PARM_BOUNDARY;
2551 /* Return true if N is a possible register number of function value. */
2553 ix86_function_value_regno_p (regno)
2558 return ((regno) == 0
2559 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2560 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2562 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2563 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2564 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2567 /* Define how to find the value returned by a function.
2568 VALTYPE is the data type of the value (as a tree).
2569 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2570 otherwise, FUNC is 0. */
2572 ix86_function_value (valtype)
2577 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2578 REGPARM_MAX, SSE_REGPARM_MAX,
2579 x86_64_int_return_registers, 0);
2580 /* For zero sized structures, construct_container return NULL, but we need
2581 to keep rest of compiler happy by returning meaningful value. */
2583 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2587 return gen_rtx_REG (TYPE_MODE (valtype),
2588 ix86_value_regno (TYPE_MODE (valtype)));
2591 /* Return false iff type is returned in memory. */
2593 ix86_return_in_memory (type)
2596 int needed_intregs, needed_sseregs;
2599 return !examine_argument (TYPE_MODE (type), type, 1,
2600 &needed_intregs, &needed_sseregs);
2604 if (TYPE_MODE (type) == BLKmode
2605 || (VECTOR_MODE_P (TYPE_MODE (type))
2606 && int_size_in_bytes (type) == 8)
2607 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2608 && TYPE_MODE (type) != TFmode
2609 && !VECTOR_MODE_P (TYPE_MODE (type))))
2615 /* Define how to find the value returned by a library function
2616 assuming the value has mode MODE. */
2618 ix86_libcall_value (mode)
2619 enum machine_mode mode;
2629 return gen_rtx_REG (mode, FIRST_SSE_REG);
2632 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2634 return gen_rtx_REG (mode, 0);
2638 return gen_rtx_REG (mode, ix86_value_regno (mode));
2641 /* Given a mode, return the register to use for a return value. */
2644 ix86_value_regno (mode)
2645 enum machine_mode mode;
2647 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2648 return FIRST_FLOAT_REG;
2649 if (mode == TImode || VECTOR_MODE_P (mode))
2650 return FIRST_SSE_REG;
2654 /* Create the va_list data type. */
2657 ix86_build_va_list ()
2659 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2661 /* For i386 we use plain pointer to argument area. */
2663 return build_pointer_type (char_type_node);
2665 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2666 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2668 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2669 unsigned_type_node);
2670 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2671 unsigned_type_node);
2672 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2674 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2677 DECL_FIELD_CONTEXT (f_gpr) = record;
2678 DECL_FIELD_CONTEXT (f_fpr) = record;
2679 DECL_FIELD_CONTEXT (f_ovf) = record;
2680 DECL_FIELD_CONTEXT (f_sav) = record;
2682 TREE_CHAIN (record) = type_decl;
2683 TYPE_NAME (record) = type_decl;
2684 TYPE_FIELDS (record) = f_gpr;
2685 TREE_CHAIN (f_gpr) = f_fpr;
2686 TREE_CHAIN (f_fpr) = f_ovf;
2687 TREE_CHAIN (f_ovf) = f_sav;
2689 layout_type (record);
2691 /* The correct type is an array type of one element. */
2692 return build_array_type (record, build_index_type (size_zero_node));
2695 /* Perform any needed actions needed for a function that is receiving a
2696 variable number of arguments.
2700 MODE and TYPE are the mode and type of the current parameter.
2702 PRETEND_SIZE is a variable that should be set to the amount of stack
2703 that must be pushed by the prolog to pretend that our caller pushed
2706 Normally, this macro will push all remaining incoming registers on the
2707 stack and set PRETEND_SIZE to the length of the registers pushed. */
2710 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2711 CUMULATIVE_ARGS *cum;
2712 enum machine_mode mode;
2714 int *pretend_size ATTRIBUTE_UNUSED;
2718 CUMULATIVE_ARGS next_cum;
2719 rtx save_area = NULL_RTX, mem;
2732 /* Indicate to allocate space on the stack for varargs save area. */
2733 ix86_save_varrargs_registers = 1;
2735 fntype = TREE_TYPE (current_function_decl);
2736 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2737 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2738 != void_type_node));
2740 /* For varargs, we do not want to skip the dummy va_dcl argument.
2741 For stdargs, we do want to skip the last named argument. */
2744 function_arg_advance (&next_cum, mode, type, 1);
2747 save_area = frame_pointer_rtx;
2749 set = get_varargs_alias_set ();
2751 for (i = next_cum.regno; i < ix86_regparm; i++)
2753 mem = gen_rtx_MEM (Pmode,
2754 plus_constant (save_area, i * UNITS_PER_WORD));
2755 set_mem_alias_set (mem, set);
2756 emit_move_insn (mem, gen_rtx_REG (Pmode,
2757 x86_64_int_parameter_registers[i]));
2760 if (next_cum.sse_nregs)
2762 /* Now emit code to save SSE registers. The AX parameter contains number
2763 of SSE parameter registers used to call this function. We use
2764 sse_prologue_save insn template that produces computed jump across
2765 SSE saves. We need some preparation work to get this working. */
2767 label = gen_label_rtx ();
2768 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2770 /* Compute address to jump to :
2771 label - 5*eax + nnamed_sse_arguments*5 */
2772 tmp_reg = gen_reg_rtx (Pmode);
2773 nsse_reg = gen_reg_rtx (Pmode);
2774 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2775 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2776 gen_rtx_MULT (Pmode, nsse_reg,
2778 if (next_cum.sse_regno)
2781 gen_rtx_CONST (DImode,
2782 gen_rtx_PLUS (DImode,
2784 GEN_INT (next_cum.sse_regno * 4))));
2786 emit_move_insn (nsse_reg, label_ref);
2787 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2789 /* Compute address of memory block we save into. We always use pointer
2790 pointing 127 bytes after first byte to store - this is needed to keep
2791 instruction size limited by 4 bytes. */
2792 tmp_reg = gen_reg_rtx (Pmode);
2793 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2794 plus_constant (save_area,
2795 8 * REGPARM_MAX + 127)));
2796 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2797 set_mem_alias_set (mem, set);
2798 set_mem_align (mem, BITS_PER_WORD);
2800 /* And finally do the dirty job! */
2801 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2802 GEN_INT (next_cum.sse_regno), label));
2807 /* Implement va_start. */
2810 ix86_va_start (valist, nextarg)
2814 HOST_WIDE_INT words, n_gpr, n_fpr;
2815 tree f_gpr, f_fpr, f_ovf, f_sav;
2816 tree gpr, fpr, ovf, sav, t;
2818 /* Only 64bit target needs something special. */
2821 std_expand_builtin_va_start (valist, nextarg);
2825 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2826 f_fpr = TREE_CHAIN (f_gpr);
2827 f_ovf = TREE_CHAIN (f_fpr);
2828 f_sav = TREE_CHAIN (f_ovf);
2830 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2831 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2832 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2833 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2834 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2836 /* Count number of gp and fp argument registers used. */
2837 words = current_function_args_info.words;
2838 n_gpr = current_function_args_info.regno;
2839 n_fpr = current_function_args_info.sse_regno;
2841 if (TARGET_DEBUG_ARG)
2842 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2843 (int) words, (int) n_gpr, (int) n_fpr);
2845 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2846 build_int_2 (n_gpr * 8, 0));
2847 TREE_SIDE_EFFECTS (t) = 1;
2848 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2850 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2851 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2852 TREE_SIDE_EFFECTS (t) = 1;
2853 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2855 /* Find the overflow area. */
2856 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2858 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2859 build_int_2 (words * UNITS_PER_WORD, 0));
2860 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2861 TREE_SIDE_EFFECTS (t) = 1;
2862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2864 /* Find the register save area.
2865 Prologue of the function save it right above stack frame. */
2866 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2867 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2868 TREE_SIDE_EFFECTS (t) = 1;
2869 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2872 /* Implement va_arg. */
2874 ix86_va_arg (valist, type)
2877 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2878 tree f_gpr, f_fpr, f_ovf, f_sav;
2879 tree gpr, fpr, ovf, sav, t;
2881 rtx lab_false, lab_over = NULL_RTX;
2886 /* Only 64bit target needs something special. */
2889 return std_expand_builtin_va_arg (valist, type);
2892 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2893 f_fpr = TREE_CHAIN (f_gpr);
2894 f_ovf = TREE_CHAIN (f_fpr);
2895 f_sav = TREE_CHAIN (f_ovf);
2897 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2898 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2899 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2900 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2901 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2903 size = int_size_in_bytes (type);
2906 /* Passed by reference. */
2908 type = build_pointer_type (type);
2909 size = int_size_in_bytes (type);
2911 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2913 container = construct_container (TYPE_MODE (type), type, 0,
2914 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2916 * Pull the value out of the saved registers ...
2919 addr_rtx = gen_reg_rtx (Pmode);
2923 rtx int_addr_rtx, sse_addr_rtx;
2924 int needed_intregs, needed_sseregs;
2927 lab_over = gen_label_rtx ();
2928 lab_false = gen_label_rtx ();
2930 examine_argument (TYPE_MODE (type), type, 0,
2931 &needed_intregs, &needed_sseregs);
2934 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2935 || TYPE_ALIGN (type) > 128);
2937 /* In case we are passing structure, verify that it is consecutive block
2938 on the register save area. If not we need to do moves. */
2939 if (!need_temp && !REG_P (container))
2941 /* Verify that all registers are strictly consecutive */
2942 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2946 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2948 rtx slot = XVECEXP (container, 0, i);
2949 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2950 || INTVAL (XEXP (slot, 1)) != i * 16)
2958 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2960 rtx slot = XVECEXP (container, 0, i);
2961 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2962 || INTVAL (XEXP (slot, 1)) != i * 8)
2969 int_addr_rtx = addr_rtx;
2970 sse_addr_rtx = addr_rtx;
2974 int_addr_rtx = gen_reg_rtx (Pmode);
2975 sse_addr_rtx = gen_reg_rtx (Pmode);
2977 /* First ensure that we fit completely in registers. */
2980 emit_cmp_and_jump_insns (expand_expr
2981 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2982 GEN_INT ((REGPARM_MAX - needed_intregs +
2983 1) * 8), GE, const1_rtx, SImode,
2988 emit_cmp_and_jump_insns (expand_expr
2989 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2990 GEN_INT ((SSE_REGPARM_MAX -
2991 needed_sseregs + 1) * 16 +
2992 REGPARM_MAX * 8), GE, const1_rtx,
2993 SImode, 1, lab_false);
2996 /* Compute index to start of area used for integer regs. */
2999 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3000 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3001 if (r != int_addr_rtx)
3002 emit_move_insn (int_addr_rtx, r);
3006 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3007 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3008 if (r != sse_addr_rtx)
3009 emit_move_insn (sse_addr_rtx, r);
3016 /* Never use the memory itself, as it has the alias set. */
3017 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3018 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3019 set_mem_alias_set (mem, get_varargs_alias_set ());
3020 set_mem_align (mem, BITS_PER_UNIT);
3022 for (i = 0; i < XVECLEN (container, 0); i++)
3024 rtx slot = XVECEXP (container, 0, i);
3025 rtx reg = XEXP (slot, 0);
3026 enum machine_mode mode = GET_MODE (reg);
3032 if (SSE_REGNO_P (REGNO (reg)))
3034 src_addr = sse_addr_rtx;
3035 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3039 src_addr = int_addr_rtx;
3040 src_offset = REGNO (reg) * 8;
3042 src_mem = gen_rtx_MEM (mode, src_addr);
3043 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3044 src_mem = adjust_address (src_mem, mode, src_offset);
3045 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3046 emit_move_insn (dest_mem, src_mem);
3053 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3054 build_int_2 (needed_intregs * 8, 0));
3055 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3056 TREE_SIDE_EFFECTS (t) = 1;
3057 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3062 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3063 build_int_2 (needed_sseregs * 16, 0));
3064 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3065 TREE_SIDE_EFFECTS (t) = 1;
3066 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3069 emit_jump_insn (gen_jump (lab_over));
3071 emit_label (lab_false);
3074 /* ... otherwise out of the overflow area. */
3076 /* Care for on-stack alignment if needed. */
3077 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3081 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3082 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3083 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3087 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3089 emit_move_insn (addr_rtx, r);
3092 build (PLUS_EXPR, TREE_TYPE (t), t,
3093 build_int_2 (rsize * UNITS_PER_WORD, 0));
3094 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3095 TREE_SIDE_EFFECTS (t) = 1;
3096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3099 emit_label (lab_over);
3103 r = gen_rtx_MEM (Pmode, addr_rtx);
3104 set_mem_alias_set (r, get_varargs_alias_set ());
3105 emit_move_insn (addr_rtx, r);
3111 /* Return nonzero if OP is either a i387 or SSE fp register. */
3113 any_fp_register_operand (op, mode)
3115 enum machine_mode mode ATTRIBUTE_UNUSED;
3117 return ANY_FP_REG_P (op);
3120 /* Return nonzero if OP is an i387 fp register. */
3122 fp_register_operand (op, mode)
3124 enum machine_mode mode ATTRIBUTE_UNUSED;
3126 return FP_REG_P (op);
3129 /* Return nonzero if OP is a non-fp register_operand. */
3131 register_and_not_any_fp_reg_operand (op, mode)
3133 enum machine_mode mode;
3135 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3138 /* Return nonzero if OP is a register operand other than an
3139 i387 fp register. */
3141 register_and_not_fp_reg_operand (op, mode)
3143 enum machine_mode mode;
3145 return register_operand (op, mode) && !FP_REG_P (op);
3148 /* Return nonzero if OP is general operand representable on x86_64. */
3151 x86_64_general_operand (op, mode)
3153 enum machine_mode mode;
3156 return general_operand (op, mode);
3157 if (nonimmediate_operand (op, mode))
3159 return x86_64_sign_extended_value (op);
3162 /* Return nonzero if OP is general operand representable on x86_64
3163 as either sign extended or zero extended constant. */
3166 x86_64_szext_general_operand (op, mode)
3168 enum machine_mode mode;
3171 return general_operand (op, mode);
3172 if (nonimmediate_operand (op, mode))
3174 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3177 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3180 x86_64_nonmemory_operand (op, mode)
3182 enum machine_mode mode;
3185 return nonmemory_operand (op, mode);
3186 if (register_operand (op, mode))
3188 return x86_64_sign_extended_value (op);
3191 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3194 x86_64_movabs_operand (op, mode)
3196 enum machine_mode mode;
3198 if (!TARGET_64BIT || !flag_pic)
3199 return nonmemory_operand (op, mode);
3200 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3202 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3207 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3210 x86_64_szext_nonmemory_operand (op, mode)
3212 enum machine_mode mode;
3215 return nonmemory_operand (op, mode);
3216 if (register_operand (op, mode))
3218 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3221 /* Return nonzero if OP is immediate operand representable on x86_64. */
3224 x86_64_immediate_operand (op, mode)
3226 enum machine_mode mode;
3229 return immediate_operand (op, mode);
3230 return x86_64_sign_extended_value (op);
3233 /* Return nonzero if OP is immediate operand representable on x86_64. */
3236 x86_64_zext_immediate_operand (op, mode)
3238 enum machine_mode mode ATTRIBUTE_UNUSED;
3240 return x86_64_zero_extended_value (op);
3243 /* Return nonzero if OP is (const_int 1), else return zero. */
3246 const_int_1_operand (op, mode)
3248 enum machine_mode mode ATTRIBUTE_UNUSED;
3250 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3253 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3254 for shift & compare patterns, as shifting by 0 does not change flags),
3255 else return zero. */
3258 const_int_1_31_operand (op, mode)
3260 enum machine_mode mode ATTRIBUTE_UNUSED;
3262 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3265 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3266 reference and a constant. */
3269 symbolic_operand (op, mode)
3271 enum machine_mode mode ATTRIBUTE_UNUSED;
3273 switch (GET_CODE (op))
3281 if (GET_CODE (op) == SYMBOL_REF
3282 || GET_CODE (op) == LABEL_REF
3283 || (GET_CODE (op) == UNSPEC
3284 && (XINT (op, 1) == UNSPEC_GOT
3285 || XINT (op, 1) == UNSPEC_GOTOFF
3286 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3288 if (GET_CODE (op) != PLUS
3289 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3293 if (GET_CODE (op) == SYMBOL_REF
3294 || GET_CODE (op) == LABEL_REF)
3296 /* Only @GOTOFF gets offsets. */
3297 if (GET_CODE (op) != UNSPEC
3298 || XINT (op, 1) != UNSPEC_GOTOFF)
3301 op = XVECEXP (op, 0, 0);
3302 if (GET_CODE (op) == SYMBOL_REF
3303 || GET_CODE (op) == LABEL_REF)
3312 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3315 pic_symbolic_operand (op, mode)
3317 enum machine_mode mode ATTRIBUTE_UNUSED;
3319 if (GET_CODE (op) != CONST)
3324 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3329 if (GET_CODE (op) == UNSPEC)
3331 if (GET_CODE (op) != PLUS
3332 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3335 if (GET_CODE (op) == UNSPEC)
3341 /* Return true if OP is a symbolic operand that resolves locally. */
3344 local_symbolic_operand (op, mode)
3346 enum machine_mode mode ATTRIBUTE_UNUSED;
3348 if (GET_CODE (op) == CONST
3349 && GET_CODE (XEXP (op, 0)) == PLUS
3350 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3351 op = XEXP (XEXP (op, 0), 0);
3353 if (GET_CODE (op) == LABEL_REF)
3356 if (GET_CODE (op) != SYMBOL_REF)
3359 /* These we've been told are local by varasm and encode_section_info
3361 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3364 /* There is, however, a not insubstantial body of code in the rest of
3365 the compiler that assumes it can just stick the results of
3366 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3367 /* ??? This is a hack. Should update the body of the compiler to
3368 always create a DECL an invoke targetm.encode_section_info. */
3369 if (strncmp (XSTR (op, 0), internal_label_prefix,
3370 internal_label_prefix_len) == 0)
3376 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3379 tls_symbolic_operand (op, mode)
3381 enum machine_mode mode ATTRIBUTE_UNUSED;
3383 const char *symbol_str;
3385 if (GET_CODE (op) != SYMBOL_REF)
3387 symbol_str = XSTR (op, 0);
3389 if (symbol_str[0] != '%')
3391 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3395 tls_symbolic_operand_1 (op, kind)
3397 enum tls_model kind;
3399 const char *symbol_str;
3401 if (GET_CODE (op) != SYMBOL_REF)
3403 symbol_str = XSTR (op, 0);
3405 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3409 global_dynamic_symbolic_operand (op, mode)
3411 enum machine_mode mode ATTRIBUTE_UNUSED;
3413 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3417 local_dynamic_symbolic_operand (op, mode)
3419 enum machine_mode mode ATTRIBUTE_UNUSED;
3421 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3425 initial_exec_symbolic_operand (op, mode)
3427 enum machine_mode mode ATTRIBUTE_UNUSED;
3429 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3433 local_exec_symbolic_operand (op, mode)
3435 enum machine_mode mode ATTRIBUTE_UNUSED;
3437 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3440 /* Test for a valid operand for a call instruction. Don't allow the
3441 arg pointer register or virtual regs since they may decay into
3442 reg + const, which the patterns can't handle. */
3445 call_insn_operand (op, mode)
3447 enum machine_mode mode ATTRIBUTE_UNUSED;
3449 /* Disallow indirect through a virtual register. This leads to
3450 compiler aborts when trying to eliminate them. */
3451 if (GET_CODE (op) == REG
3452 && (op == arg_pointer_rtx
3453 || op == frame_pointer_rtx
3454 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3455 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3458 /* Disallow `call 1234'. Due to varying assembler lameness this
3459 gets either rejected or translated to `call .+1234'. */
3460 if (GET_CODE (op) == CONST_INT)
3463 /* Explicitly allow SYMBOL_REF even if pic. */
3464 if (GET_CODE (op) == SYMBOL_REF)
3467 /* Otherwise we can allow any general_operand in the address. */
3468 return general_operand (op, Pmode);
3471 /* Test for a valid operand for a call instruction. Don't allow the
3472 arg pointer register or virtual regs since they may decay into
3473 reg + const, which the patterns can't handle. */
3476 sibcall_insn_operand (op, mode)
3478 enum machine_mode mode ATTRIBUTE_UNUSED;
3480 /* Disallow indirect through a virtual register. This leads to
3481 compiler aborts when trying to eliminate them. */
3482 if (GET_CODE (op) == REG
3483 && (op == arg_pointer_rtx
3484 || op == frame_pointer_rtx
3485 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3486 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3489 /* Explicitly allow SYMBOL_REF even if pic. */
3490 if (GET_CODE (op) == SYMBOL_REF)
3493 /* Otherwise we can only allow register operands. */
3494 return register_operand (op, Pmode);
3498 constant_call_address_operand (op, mode)
3500 enum machine_mode mode ATTRIBUTE_UNUSED;
3502 if (GET_CODE (op) == CONST
3503 && GET_CODE (XEXP (op, 0)) == PLUS
3504 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3505 op = XEXP (XEXP (op, 0), 0);
3506 return GET_CODE (op) == SYMBOL_REF;
3509 /* Match exactly zero and one. */
3512 const0_operand (op, mode)
3514 enum machine_mode mode;
3516 return op == CONST0_RTX (mode);
3520 const1_operand (op, mode)
3522 enum machine_mode mode ATTRIBUTE_UNUSED;
3524 return op == const1_rtx;
3527 /* Match 2, 4, or 8. Used for leal multiplicands. */
3530 const248_operand (op, mode)
3532 enum machine_mode mode ATTRIBUTE_UNUSED;
3534 return (GET_CODE (op) == CONST_INT
3535 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3538 /* True if this is a constant appropriate for an increment or decrement. */
3541 incdec_operand (op, mode)
3543 enum machine_mode mode ATTRIBUTE_UNUSED;
3545 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3546 registers, since carry flag is not set. */
3547 if (TARGET_PENTIUM4 && !optimize_size)
3549 return op == const1_rtx || op == constm1_rtx;
3552 /* Return nonzero if OP is acceptable as operand of DImode shift
3556 shiftdi_operand (op, mode)
3558 enum machine_mode mode ATTRIBUTE_UNUSED;
3561 return nonimmediate_operand (op, mode);
3563 return register_operand (op, mode);
3566 /* Return false if this is the stack pointer, or any other fake
3567 register eliminable to the stack pointer. Otherwise, this is
3570 This is used to prevent esp from being used as an index reg.
3571 Which would only happen in pathological cases. */
3574 reg_no_sp_operand (op, mode)
3576 enum machine_mode mode;
3579 if (GET_CODE (t) == SUBREG)
3581 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3584 return register_operand (op, mode);
3588 mmx_reg_operand (op, mode)
3590 enum machine_mode mode ATTRIBUTE_UNUSED;
3592 return MMX_REG_P (op);
3595 /* Return false if this is any eliminable register. Otherwise
3599 general_no_elim_operand (op, mode)
3601 enum machine_mode mode;
3604 if (GET_CODE (t) == SUBREG)
3606 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3607 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3608 || t == virtual_stack_dynamic_rtx)
3611 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3612 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3615 return general_operand (op, mode);
3618 /* Return false if this is any eliminable register. Otherwise
3619 register_operand or const_int. */
3622 nonmemory_no_elim_operand (op, mode)
3624 enum machine_mode mode;
3627 if (GET_CODE (t) == SUBREG)
3629 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3630 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3631 || t == virtual_stack_dynamic_rtx)
3634 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3637 /* Return false if this is any eliminable register or stack register,
3638 otherwise work like register_operand. */
3641 index_register_operand (op, mode)
3643 enum machine_mode mode;
3646 if (GET_CODE (t) == SUBREG)
3650 if (t == arg_pointer_rtx
3651 || t == frame_pointer_rtx
3652 || t == virtual_incoming_args_rtx
3653 || t == virtual_stack_vars_rtx
3654 || t == virtual_stack_dynamic_rtx
3655 || REGNO (t) == STACK_POINTER_REGNUM)
3658 return general_operand (op, mode);
3661 /* Return true if op is a Q_REGS class register. */
3664 q_regs_operand (op, mode)
3666 enum machine_mode mode;
3668 if (mode != VOIDmode && GET_MODE (op) != mode)
3670 if (GET_CODE (op) == SUBREG)
3671 op = SUBREG_REG (op);
3672 return ANY_QI_REG_P (op);
3675 /* Return true if op is an flags register. */
3678 flags_reg_operand (op, mode)
3680 enum machine_mode mode;
3682 if (mode != VOIDmode && GET_MODE (op) != mode)
3684 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3687 /* Return true if op is a NON_Q_REGS class register. */
3690 non_q_regs_operand (op, mode)
3692 enum machine_mode mode;
3694 if (mode != VOIDmode && GET_MODE (op) != mode)
3696 if (GET_CODE (op) == SUBREG)
3697 op = SUBREG_REG (op);
3698 return NON_QI_REG_P (op);
3702 zero_extended_scalar_load_operand (op, mode)
3704 enum machine_mode mode ATTRIBUTE_UNUSED;
3707 if (GET_CODE (op) != MEM)
3709 op = maybe_get_pool_constant (op);
3712 if (GET_CODE (op) != CONST_VECTOR)
3715 (GET_MODE_SIZE (GET_MODE (op)) /
3716 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3717 for (n_elts--; n_elts > 0; n_elts--)
3719 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3720 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3726 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3729 sse_comparison_operator (op, mode)
3731 enum machine_mode mode ATTRIBUTE_UNUSED;
3733 enum rtx_code code = GET_CODE (op);
3736 /* Operations supported directly. */
3746 /* These are equivalent to ones above in non-IEEE comparisons. */
3753 return !TARGET_IEEE_FP;
3758 /* Return 1 if OP is a valid comparison operator in valid mode. */
3760 ix86_comparison_operator (op, mode)
3762 enum machine_mode mode;
3764 enum machine_mode inmode;
3765 enum rtx_code code = GET_CODE (op);
3766 if (mode != VOIDmode && GET_MODE (op) != mode)
3768 if (GET_RTX_CLASS (code) != '<')
3770 inmode = GET_MODE (XEXP (op, 0));
3772 if (inmode == CCFPmode || inmode == CCFPUmode)
3774 enum rtx_code second_code, bypass_code;
3775 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3776 return (bypass_code == NIL && second_code == NIL);
3783 if (inmode == CCmode || inmode == CCGCmode
3784 || inmode == CCGOCmode || inmode == CCNOmode)
3787 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3788 if (inmode == CCmode)
3792 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3800 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3803 fcmov_comparison_operator (op, mode)
3805 enum machine_mode mode;
3807 enum machine_mode inmode;
3808 enum rtx_code code = GET_CODE (op);
3809 if (mode != VOIDmode && GET_MODE (op) != mode)
3811 if (GET_RTX_CLASS (code) != '<')
3813 inmode = GET_MODE (XEXP (op, 0));
3814 if (inmode == CCFPmode || inmode == CCFPUmode)
3816 enum rtx_code second_code, bypass_code;
3817 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3818 if (bypass_code != NIL || second_code != NIL)
3820 code = ix86_fp_compare_code_to_integer (code);
3822 /* i387 supports just limited amount of conditional codes. */
3825 case LTU: case GTU: case LEU: case GEU:
3826 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3829 case ORDERED: case UNORDERED:
3837 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3840 promotable_binary_operator (op, mode)
3842 enum machine_mode mode ATTRIBUTE_UNUSED;
3844 switch (GET_CODE (op))
3847 /* Modern CPUs have same latency for HImode and SImode multiply,
3848 but 386 and 486 do HImode multiply faster. */
3849 return ix86_cpu > PROCESSOR_I486;
3861 /* Nearly general operand, but accept any const_double, since we wish
3862 to be able to drop them into memory rather than have them get pulled
3866 cmp_fp_expander_operand (op, mode)
3868 enum machine_mode mode;
3870 if (mode != VOIDmode && mode != GET_MODE (op))
3872 if (GET_CODE (op) == CONST_DOUBLE)
3874 return general_operand (op, mode);
3877 /* Match an SI or HImode register for a zero_extract. */
3880 ext_register_operand (op, mode)
3882 enum machine_mode mode ATTRIBUTE_UNUSED;
3885 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3886 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3889 if (!register_operand (op, VOIDmode))
3892 /* Be careful to accept only registers having upper parts. */
3893 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3894 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3897 /* Return 1 if this is a valid binary floating-point operation.
3898 OP is the expression matched, and MODE is its mode. */
3901 binary_fp_operator (op, mode)
3903 enum machine_mode mode;
3905 if (mode != VOIDmode && mode != GET_MODE (op))
3908 switch (GET_CODE (op))
3914 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3922 mult_operator (op, mode)
3924 enum machine_mode mode ATTRIBUTE_UNUSED;
3926 return GET_CODE (op) == MULT;
3930 div_operator (op, mode)
3932 enum machine_mode mode ATTRIBUTE_UNUSED;
3934 return GET_CODE (op) == DIV;
3938 arith_or_logical_operator (op, mode)
3940 enum machine_mode mode;
3942 return ((mode == VOIDmode || GET_MODE (op) == mode)
3943 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3944 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3947 /* Returns 1 if OP is memory operand with a displacement. */
3950 memory_displacement_operand (op, mode)
3952 enum machine_mode mode;
3954 struct ix86_address parts;
3956 if (! memory_operand (op, mode))
3959 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3962 return parts.disp != NULL_RTX;
3965 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
3966 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3968 ??? It seems likely that this will only work because cmpsi is an
3969 expander, and no actual insns use this. */
3972 cmpsi_operand (op, mode)
3974 enum machine_mode mode;
3976 if (nonimmediate_operand (op, mode))
3979 if (GET_CODE (op) == AND
3980 && GET_MODE (op) == SImode
3981 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3982 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3983 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3984 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3985 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3986 && GET_CODE (XEXP (op, 1)) == CONST_INT)
3992 /* Returns 1 if OP is memory operand that can not be represented by the
3996 long_memory_operand (op, mode)
3998 enum machine_mode mode;
4000 if (! memory_operand (op, mode))
4003 return memory_address_length (op) != 0;
4006 /* Return nonzero if the rtx is known aligned. */
4009 aligned_operand (op, mode)
4011 enum machine_mode mode;
4013 struct ix86_address parts;
4015 if (!general_operand (op, mode))
4018 /* Registers and immediate operands are always "aligned". */
4019 if (GET_CODE (op) != MEM)
4022 /* Don't even try to do any aligned optimizations with volatiles. */
4023 if (MEM_VOLATILE_P (op))
4028 /* Pushes and pops are only valid on the stack pointer. */
4029 if (GET_CODE (op) == PRE_DEC
4030 || GET_CODE (op) == POST_INC)
4033 /* Decode the address. */
4034 if (! ix86_decompose_address (op, &parts))
4037 if (parts.base && GET_CODE (parts.base) == SUBREG)
4038 parts.base = SUBREG_REG (parts.base);
4039 if (parts.index && GET_CODE (parts.index) == SUBREG)
4040 parts.index = SUBREG_REG (parts.index);
4042 /* Look for some component that isn't known to be aligned. */
4046 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4051 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4056 if (GET_CODE (parts.disp) != CONST_INT
4057 || (INTVAL (parts.disp) & 3) != 0)
4061 /* Didn't find one -- this must be an aligned address. */
4065 /* Return true if the constant is something that can be loaded with
4066 a special instruction. Only handle 0.0 and 1.0; others are less
4070 standard_80387_constant_p (x)
4073 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4075 /* Note that on the 80387, other constants, such as pi, that we should support
4076 too. On some machines, these are much slower to load as standard constant,
4077 than to load from doubles in memory. */
4078 if (x == CONST0_RTX (GET_MODE (x)))
4080 if (x == CONST1_RTX (GET_MODE (x)))
4085 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4088 standard_sse_constant_p (x)
4091 if (x == const0_rtx)
4093 return (x == CONST0_RTX (GET_MODE (x)));
4096 /* Returns 1 if OP contains a symbol reference */
4099 symbolic_reference_mentioned_p (op)
4102 register const char *fmt;
4105 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4108 fmt = GET_RTX_FORMAT (GET_CODE (op));
4109 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4115 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4116 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4120 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4127 /* Return 1 if it is appropriate to emit `ret' instructions in the
4128 body of a function. Do this only if the epilogue is simple, needing a
4129 couple of insns. Prior to reloading, we can't tell how many registers
4130 must be saved, so return 0 then. Return 0 if there is no frame
4131 marker to de-allocate.
4133 If NON_SAVING_SETJMP is defined and true, then it is not possible
4134 for the epilogue to be simple, so return 0. This is a special case
4135 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4136 until final, but jump_optimize may need to know sooner if a
4140 ix86_can_use_return_insn_p ()
4142 struct ix86_frame frame;
4144 #ifdef NON_SAVING_SETJMP
4145 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4149 if (! reload_completed || frame_pointer_needed)
4152 /* Don't allow more than 32 pop, since that's all we can do
4153 with one instruction. */
4154 if (current_function_pops_args
4155 && current_function_args_size >= 32768)
4158 ix86_compute_frame_layout (&frame);
4159 return frame.to_allocate == 0 && frame.nregs == 0;
4162 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4164 x86_64_sign_extended_value (value)
4167 switch (GET_CODE (value))
4169 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4170 to be at least 32 and this all acceptable constants are
4171 represented as CONST_INT. */
4173 if (HOST_BITS_PER_WIDE_INT == 32)
4177 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4178 return trunc_int_for_mode (val, SImode) == val;
4182 /* For certain code models, the symbolic references are known to fit.
4183 in CM_SMALL_PIC model we know it fits if it is local to the shared
4184 library. Don't count TLS SYMBOL_REFs here, since they should fit
4185 only if inside of UNSPEC handled below. */
4187 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4189 /* For certain code models, the code is near as well. */
4191 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4192 || ix86_cmodel == CM_KERNEL);
4194 /* We also may accept the offsetted memory references in certain special
4197 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4198 switch (XINT (XEXP (value, 0), 1))
4200 case UNSPEC_GOTPCREL:
4202 case UNSPEC_GOTNTPOFF:
4208 if (GET_CODE (XEXP (value, 0)) == PLUS)
4210 rtx op1 = XEXP (XEXP (value, 0), 0);
4211 rtx op2 = XEXP (XEXP (value, 0), 1);
4212 HOST_WIDE_INT offset;
4214 if (ix86_cmodel == CM_LARGE)
4216 if (GET_CODE (op2) != CONST_INT)
4218 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4219 switch (GET_CODE (op1))
4222 /* For CM_SMALL assume that latest object is 16MB before
4223 end of 31bits boundary. We may also accept pretty
4224 large negative constants knowing that all objects are
4225 in the positive half of address space. */
4226 if (ix86_cmodel == CM_SMALL
4227 && offset < 16*1024*1024
4228 && trunc_int_for_mode (offset, SImode) == offset)
4230 /* For CM_KERNEL we know that all object resist in the
4231 negative half of 32bits address space. We may not
4232 accept negative offsets, since they may be just off
4233 and we may accept pretty large positive ones. */
4234 if (ix86_cmodel == CM_KERNEL
4236 && trunc_int_for_mode (offset, SImode) == offset)
4240 /* These conditions are similar to SYMBOL_REF ones, just the
4241 constraints for code models differ. */
4242 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4243 && offset < 16*1024*1024
4244 && trunc_int_for_mode (offset, SImode) == offset)
4246 if (ix86_cmodel == CM_KERNEL
4248 && trunc_int_for_mode (offset, SImode) == offset)
4252 switch (XINT (op1, 1))
4257 && trunc_int_for_mode (offset, SImode) == offset)
4271 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4273 x86_64_zero_extended_value (value)
4276 switch (GET_CODE (value))
4279 if (HOST_BITS_PER_WIDE_INT == 32)
4280 return (GET_MODE (value) == VOIDmode
4281 && !CONST_DOUBLE_HIGH (value));
4285 if (HOST_BITS_PER_WIDE_INT == 32)
4286 return INTVAL (value) >= 0;
4288 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4291 /* For certain code models, the symbolic references are known to fit. */
4293 return ix86_cmodel == CM_SMALL;
4295 /* For certain code models, the code is near as well. */
4297 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4299 /* We also may accept the offsetted memory references in certain special
4302 if (GET_CODE (XEXP (value, 0)) == PLUS)
4304 rtx op1 = XEXP (XEXP (value, 0), 0);
4305 rtx op2 = XEXP (XEXP (value, 0), 1);
4307 if (ix86_cmodel == CM_LARGE)
4309 switch (GET_CODE (op1))
4313 /* For small code model we may accept pretty large positive
4314 offsets, since one bit is available for free. Negative
4315 offsets are limited by the size of NULL pointer area
4316 specified by the ABI. */
4317 if (ix86_cmodel == CM_SMALL
4318 && GET_CODE (op2) == CONST_INT
4319 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4320 && (trunc_int_for_mode (INTVAL (op2), SImode)
4323 /* ??? For the kernel, we may accept adjustment of
4324 -0x10000000, since we know that it will just convert
4325 negative address space to positive, but perhaps this
4326 is not worthwhile. */
4329 /* These conditions are similar to SYMBOL_REF ones, just the
4330 constraints for code models differ. */
4331 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4332 && GET_CODE (op2) == CONST_INT
4333 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4334 && (trunc_int_for_mode (INTVAL (op2), SImode)
4348 /* Value should be nonzero if functions must have frame pointers.
4349 Zero means the frame pointer need not be set up (and parms may
4350 be accessed via the stack pointer) in functions that seem suitable. */
4353 ix86_frame_pointer_required ()
4355 /* If we accessed previous frames, then the generated code expects
4356 to be able to access the saved ebp value in our frame. */
4357 if (cfun->machine->accesses_prev_frame)
4360 /* Several x86 os'es need a frame pointer for other reasons,
4361 usually pertaining to setjmp. */
4362 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4365 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4366 the frame pointer by default. Turn it back on now if we've not
4367 got a leaf function. */
4368 if (TARGET_OMIT_LEAF_FRAME_POINTER
4369 && (!current_function_is_leaf))
4372 if (current_function_profile)
4378 /* Record that the current function accesses previous call frames. */
4381 ix86_setup_frame_addresses ()
4383 cfun->machine->accesses_prev_frame = 1;
4386 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4387 # define USE_HIDDEN_LINKONCE 1
4389 # define USE_HIDDEN_LINKONCE 0
4392 static int pic_labels_used;
4394 /* Fills in the label name that should be used for a pc thunk for
4395 the given register. */
4398 get_pc_thunk_name (name, regno)
4402 if (USE_HIDDEN_LINKONCE)
4403 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4405 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4409 /* This function generates code for -fpic that loads %ebx with
4410 the return address of the caller and then returns. */
4413 ix86_asm_file_end (file)
4419 for (regno = 0; regno < 8; ++regno)
4423 if (! ((pic_labels_used >> regno) & 1))
4426 get_pc_thunk_name (name, regno);
4428 if (USE_HIDDEN_LINKONCE)
4432 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4434 TREE_PUBLIC (decl) = 1;
4435 TREE_STATIC (decl) = 1;
4436 DECL_ONE_ONLY (decl) = 1;
4438 (*targetm.asm_out.unique_section) (decl, 0);
4439 named_section (decl, NULL, 0);
4441 (*targetm.asm_out.globalize_label) (file, name);
4442 fputs ("\t.hidden\t", file);
4443 assemble_name (file, name);
4445 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4450 ASM_OUTPUT_LABEL (file, name);
4453 xops[0] = gen_rtx_REG (SImode, regno);
4454 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4455 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4456 output_asm_insn ("ret", xops);
4460 /* Emit code for the SET_GOT patterns. */
4463 output_set_got (dest)
4469 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4471 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4473 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4476 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4478 output_asm_insn ("call\t%a2", xops);
4481 /* Output the "canonical" label name ("Lxx$pb") here too. This
4482 is what will be referred to by the Mach-O PIC subsystem. */
4483 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4485 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4486 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4489 output_asm_insn ("pop{l}\t%0", xops);
4494 get_pc_thunk_name (name, REGNO (dest));
4495 pic_labels_used |= 1 << REGNO (dest);
4497 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4498 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4499 output_asm_insn ("call\t%X2", xops);
4502 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4503 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4504 else if (!TARGET_MACHO)
4505 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4510 /* Generate an "push" pattern for input ARG. */
4516 return gen_rtx_SET (VOIDmode,
4518 gen_rtx_PRE_DEC (Pmode,
4519 stack_pointer_rtx)),
4523 /* Return >= 0 if there is an unused call-clobbered register available
4524 for the entire function. */
4527 ix86_select_alt_pic_regnum ()
4529 if (current_function_is_leaf && !current_function_profile)
4532 for (i = 2; i >= 0; --i)
4533 if (!regs_ever_live[i])
4537 return INVALID_REGNUM;
4540 /* Return 1 if we need to save REGNO. */
4542 ix86_save_reg (regno, maybe_eh_return)
4544 int maybe_eh_return;
4546 if (pic_offset_table_rtx
4547 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4548 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4549 || current_function_profile
4550 || current_function_calls_eh_return))
4552 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4557 if (current_function_calls_eh_return && maybe_eh_return)
4562 unsigned test = EH_RETURN_DATA_REGNO (i);
4563 if (test == INVALID_REGNUM)
4570 return (regs_ever_live[regno]
4571 && !call_used_regs[regno]
4572 && !fixed_regs[regno]
4573 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4576 /* Return number of registers to be saved on the stack. */
4584 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4585 if (ix86_save_reg (regno, true))
4590 /* Return the offset between two registers, one to be eliminated, and the other
4591 its replacement, at the start of a routine. */
4594 ix86_initial_elimination_offset (from, to)
4598 struct ix86_frame frame;
4599 ix86_compute_frame_layout (&frame);
4601 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4602 return frame.hard_frame_pointer_offset;
4603 else if (from == FRAME_POINTER_REGNUM
4604 && to == HARD_FRAME_POINTER_REGNUM)
4605 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4608 if (to != STACK_POINTER_REGNUM)
4610 else if (from == ARG_POINTER_REGNUM)
4611 return frame.stack_pointer_offset;
4612 else if (from != FRAME_POINTER_REGNUM)
4615 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4619 /* Fill structure ix86_frame about frame of currently computed function. */
4622 ix86_compute_frame_layout (frame)
4623 struct ix86_frame *frame;
4625 HOST_WIDE_INT total_size;
4626 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4628 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4629 HOST_WIDE_INT size = get_frame_size ();
4631 frame->nregs = ix86_nsaved_regs ();
4634 /* Skip return address and saved base pointer. */
4635 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4637 frame->hard_frame_pointer_offset = offset;
4639 /* Do some sanity checking of stack_alignment_needed and
4640 preferred_alignment, since i386 port is the only using those features
4641 that may break easily. */
4643 if (size && !stack_alignment_needed)
4645 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4647 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4649 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4652 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4653 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4655 /* Register save area */
4656 offset += frame->nregs * UNITS_PER_WORD;
4659 if (ix86_save_varrargs_registers)
4661 offset += X86_64_VARARGS_SIZE;
4662 frame->va_arg_size = X86_64_VARARGS_SIZE;
4665 frame->va_arg_size = 0;
4667 /* Align start of frame for local function. */
4668 frame->padding1 = ((offset + stack_alignment_needed - 1)
4669 & -stack_alignment_needed) - offset;
4671 offset += frame->padding1;
4673 /* Frame pointer points here. */
4674 frame->frame_pointer_offset = offset;
4678 /* Add outgoing arguments area. Can be skipped if we eliminated
4679 all the function calls as dead code. */
4680 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4682 offset += current_function_outgoing_args_size;
4683 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4686 frame->outgoing_arguments_size = 0;
4688 /* Align stack boundary. Only needed if we're calling another function
4690 if (!current_function_is_leaf || current_function_calls_alloca)
4691 frame->padding2 = ((offset + preferred_alignment - 1)
4692 & -preferred_alignment) - offset;
4694 frame->padding2 = 0;
4696 offset += frame->padding2;
4698 /* We've reached end of stack frame. */
4699 frame->stack_pointer_offset = offset;
4701 /* Size prologue needs to allocate. */
4702 frame->to_allocate =
4703 (size + frame->padding1 + frame->padding2
4704 + frame->outgoing_arguments_size + frame->va_arg_size);
4706 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4707 && current_function_is_leaf)
4709 frame->red_zone_size = frame->to_allocate;
4710 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4711 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4714 frame->red_zone_size = 0;
4715 frame->to_allocate -= frame->red_zone_size;
4716 frame->stack_pointer_offset -= frame->red_zone_size;
4718 fprintf (stderr, "nregs: %i\n", frame->nregs);
4719 fprintf (stderr, "size: %i\n", size);
4720 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4721 fprintf (stderr, "padding1: %i\n", frame->padding1);
4722 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4723 fprintf (stderr, "padding2: %i\n", frame->padding2);
4724 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4725 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4726 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4727 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4728 frame->hard_frame_pointer_offset);
4729 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4733 /* Emit code to save registers in the prologue. */
4736 ix86_emit_save_regs ()
4741 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4742 if (ix86_save_reg (regno, true))
4744 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4745 RTX_FRAME_RELATED_P (insn) = 1;
4749 /* Emit code to save registers using MOV insns. First register
4750 is restored from POINTER + OFFSET. */
4752 ix86_emit_save_regs_using_mov (pointer, offset)
4754 HOST_WIDE_INT offset;
4759 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4760 if (ix86_save_reg (regno, true))
4762 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4764 gen_rtx_REG (Pmode, regno));
4765 RTX_FRAME_RELATED_P (insn) = 1;
4766 offset += UNITS_PER_WORD;
4770 /* Expand the prologue into a bunch of separate insns. */
4773 ix86_expand_prologue ()
4777 struct ix86_frame frame;
4779 HOST_WIDE_INT allocate;
4781 ix86_compute_frame_layout (&frame);
4784 int count = frame.nregs;
4786 /* The fast prologue uses move instead of push to save registers. This
4787 is significantly longer, but also executes faster as modern hardware
4788 can execute the moves in parallel, but can't do that for push/pop.
4790 Be careful about choosing what prologue to emit: When function takes
4791 many instructions to execute we may use slow version as well as in
4792 case function is known to be outside hot spot (this is known with
4793 feedback only). Weight the size of function by number of registers
4794 to save as it is cheap to use one or two push instructions but very
4795 slow to use many of them. */
4797 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4798 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4799 || (flag_branch_probabilities
4800 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4801 use_fast_prologue_epilogue = 0;
4803 use_fast_prologue_epilogue = !expensive_function_p (count);
4804 if (TARGET_PROLOGUE_USING_MOVE)
4805 use_mov = use_fast_prologue_epilogue;
4808 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4809 slower on all targets. Also sdb doesn't like it. */
4811 if (frame_pointer_needed)
4813 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4814 RTX_FRAME_RELATED_P (insn) = 1;
4816 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4817 RTX_FRAME_RELATED_P (insn) = 1;
4820 allocate = frame.to_allocate;
4821 /* In case we are dealing only with single register and empty frame,
4822 push is equivalent of the mov+add sequence. */
4823 if (allocate == 0 && frame.nregs <= 1)
4827 ix86_emit_save_regs ();
4829 allocate += frame.nregs * UNITS_PER_WORD;
4833 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4835 insn = emit_insn (gen_pro_epilogue_adjust_stack
4836 (stack_pointer_rtx, stack_pointer_rtx,
4837 GEN_INT (-allocate)));
4838 RTX_FRAME_RELATED_P (insn) = 1;
4842 /* ??? Is this only valid for Win32? */
4849 arg0 = gen_rtx_REG (SImode, 0);
4850 emit_move_insn (arg0, GEN_INT (allocate));
4852 sym = gen_rtx_MEM (FUNCTION_MODE,
4853 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4854 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4856 CALL_INSN_FUNCTION_USAGE (insn)
4857 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4858 CALL_INSN_FUNCTION_USAGE (insn));
4860 /* Don't allow scheduling pass to move insns across __alloca
4862 emit_insn (gen_blockage (const0_rtx));
4866 if (!frame_pointer_needed || !frame.to_allocate)
4867 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4869 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4870 -frame.nregs * UNITS_PER_WORD);
4873 #ifdef SUBTARGET_PROLOGUE
4877 pic_reg_used = false;
4878 if (pic_offset_table_rtx
4879 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4880 || current_function_profile))
4882 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4884 if (alt_pic_reg_used != INVALID_REGNUM)
4885 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4887 pic_reg_used = true;
4892 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4894 /* Even with accurate pre-reload life analysis, we can wind up
4895 deleting all references to the pic register after reload.
4896 Consider if cross-jumping unifies two sides of a branch
4897 controlled by a comparison vs the only read from a global.
4898 In which case, allow the set_got to be deleted, though we're
4899 too late to do anything about the ebx save in the prologue. */
4900 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4903 /* Prevent function calls from be scheduled before the call to mcount.
4904 In the pic_reg_used case, make sure that the got load isn't deleted. */
4905 if (current_function_profile)
4906 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4909 /* Emit code to restore saved registers using MOV insns. First register
4910 is restored from POINTER + OFFSET. */
4912 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4915 int maybe_eh_return;
4919 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4920 if (ix86_save_reg (regno, maybe_eh_return))
4922 emit_move_insn (gen_rtx_REG (Pmode, regno),
4923 adjust_address (gen_rtx_MEM (Pmode, pointer),
4925 offset += UNITS_PER_WORD;
4929 /* Restore function stack, frame, and registers. */
4932 ix86_expand_epilogue (style)
4936 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4937 struct ix86_frame frame;
4938 HOST_WIDE_INT offset;
4940 ix86_compute_frame_layout (&frame);
4942 /* Calculate start of saved registers relative to ebp. Special care
4943 must be taken for the normal return case of a function using
4944 eh_return: the eax and edx registers are marked as saved, but not
4945 restored along this path. */
4946 offset = frame.nregs;
4947 if (current_function_calls_eh_return && style != 2)
4949 offset *= -UNITS_PER_WORD;
4951 /* If we're only restoring one register and sp is not valid then
4952 using a move instruction to restore the register since it's
4953 less work than reloading sp and popping the register.
4955 The default code result in stack adjustment using add/lea instruction,
4956 while this code results in LEAVE instruction (or discrete equivalent),
4957 so it is profitable in some other cases as well. Especially when there
4958 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4959 and there is exactly one register to pop. This heuristic may need some
4960 tuning in future. */
4961 if ((!sp_valid && frame.nregs <= 1)
4962 || (TARGET_EPILOGUE_USING_MOVE
4963 && use_fast_prologue_epilogue
4964 && (frame.nregs > 1 || frame.to_allocate))
4965 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
4966 || (frame_pointer_needed && TARGET_USE_LEAVE
4967 && use_fast_prologue_epilogue && frame.nregs == 1)
4968 || current_function_calls_eh_return)
4970 /* Restore registers. We can use ebp or esp to address the memory
4971 locations. If both are available, default to ebp, since offsets
4972 are known to be small. Only exception is esp pointing directly to the
4973 end of block of saved registers, where we may simplify addressing
4976 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
4977 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4978 frame.to_allocate, style == 2);
4980 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4981 offset, style == 2);
4983 /* eh_return epilogues need %ecx added to the stack pointer. */
4986 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
4988 if (frame_pointer_needed)
4990 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4991 tmp = plus_constant (tmp, UNITS_PER_WORD);
4992 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4994 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4995 emit_move_insn (hard_frame_pointer_rtx, tmp);
4997 emit_insn (gen_pro_epilogue_adjust_stack
4998 (stack_pointer_rtx, sa, const0_rtx));
5002 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5003 tmp = plus_constant (tmp, (frame.to_allocate
5004 + frame.nregs * UNITS_PER_WORD));
5005 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5008 else if (!frame_pointer_needed)
5009 emit_insn (gen_pro_epilogue_adjust_stack
5010 (stack_pointer_rtx, stack_pointer_rtx,
5011 GEN_INT (frame.to_allocate
5012 + frame.nregs * UNITS_PER_WORD)));
5013 /* If not an i386, mov & pop is faster than "leave". */
5014 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5015 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5018 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5019 hard_frame_pointer_rtx,
5022 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5024 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5029 /* First step is to deallocate the stack frame so that we can
5030 pop the registers. */
5033 if (!frame_pointer_needed)
5035 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5036 hard_frame_pointer_rtx,
5039 else if (frame.to_allocate)
5040 emit_insn (gen_pro_epilogue_adjust_stack
5041 (stack_pointer_rtx, stack_pointer_rtx,
5042 GEN_INT (frame.to_allocate)));
5044 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5045 if (ix86_save_reg (regno, false))
5048 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5050 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5052 if (frame_pointer_needed)
5054 /* Leave results in shorter dependency chains on CPUs that are
5055 able to grok it fast. */
5056 if (TARGET_USE_LEAVE)
5057 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5058 else if (TARGET_64BIT)
5059 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5061 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5065 /* Sibcall epilogues don't want a return instruction. */
5069 if (current_function_pops_args && current_function_args_size)
5071 rtx popc = GEN_INT (current_function_pops_args);
5073 /* i386 can only pop 64K bytes. If asked to pop more, pop
5074 return address, do explicit add, and jump indirectly to the
5077 if (current_function_pops_args >= 65536)
5079 rtx ecx = gen_rtx_REG (SImode, 2);
5081 /* There are is no "pascal" calling convention in 64bit ABI. */
5085 emit_insn (gen_popsi1 (ecx));
5086 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5087 emit_jump_insn (gen_return_indirect_internal (ecx));
5090 emit_jump_insn (gen_return_pop_internal (popc));
5093 emit_jump_insn (gen_return_internal ());
5096 /* Reset from the function's potential modifications. */
5099 ix86_output_function_epilogue (file, size)
5100 FILE *file ATTRIBUTE_UNUSED;
5101 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5103 if (pic_offset_table_rtx)
5104 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5107 /* Extract the parts of an RTL expression that is a valid memory address
5108 for an instruction. Return 0 if the structure of the address is
5109 grossly off. Return -1 if the address contains ASHIFT, so it is not
5110 strictly valid, but still used for computing length of lea instruction.
5114 ix86_decompose_address (addr, out)
5116 struct ix86_address *out;
5118 rtx base = NULL_RTX;
5119 rtx index = NULL_RTX;
5120 rtx disp = NULL_RTX;
5121 HOST_WIDE_INT scale = 1;
5122 rtx scale_rtx = NULL_RTX;
5125 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5127 else if (GET_CODE (addr) == PLUS)
5129 rtx op0 = XEXP (addr, 0);
5130 rtx op1 = XEXP (addr, 1);
5131 enum rtx_code code0 = GET_CODE (op0);
5132 enum rtx_code code1 = GET_CODE (op1);
5134 if (code0 == REG || code0 == SUBREG)
5136 if (code1 == REG || code1 == SUBREG)
5137 index = op0, base = op1; /* index + base */
5139 base = op0, disp = op1; /* base + displacement */
5141 else if (code0 == MULT)
5143 index = XEXP (op0, 0);
5144 scale_rtx = XEXP (op0, 1);
5145 if (code1 == REG || code1 == SUBREG)
5146 base = op1; /* index*scale + base */
5148 disp = op1; /* index*scale + disp */
5150 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5152 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5153 scale_rtx = XEXP (XEXP (op0, 0), 1);
5154 base = XEXP (op0, 1);
5157 else if (code0 == PLUS)
5159 index = XEXP (op0, 0); /* index + base + disp */
5160 base = XEXP (op0, 1);
5166 else if (GET_CODE (addr) == MULT)
5168 index = XEXP (addr, 0); /* index*scale */
5169 scale_rtx = XEXP (addr, 1);
5171 else if (GET_CODE (addr) == ASHIFT)
5175 /* We're called for lea too, which implements ashift on occasion. */
5176 index = XEXP (addr, 0);
5177 tmp = XEXP (addr, 1);
5178 if (GET_CODE (tmp) != CONST_INT)
5180 scale = INTVAL (tmp);
5181 if ((unsigned HOST_WIDE_INT) scale > 3)
5187 disp = addr; /* displacement */
5189 /* Extract the integral value of scale. */
5192 if (GET_CODE (scale_rtx) != CONST_INT)
5194 scale = INTVAL (scale_rtx);
5197 /* Allow arg pointer and stack pointer as index if there is not scaling */
5198 if (base && index && scale == 1
5199 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5200 || index == stack_pointer_rtx))
5207 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5208 if ((base == hard_frame_pointer_rtx
5209 || base == frame_pointer_rtx
5210 || base == arg_pointer_rtx) && !disp)
5213 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5214 Avoid this by transforming to [%esi+0]. */
5215 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5216 && base && !index && !disp
5218 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5221 /* Special case: encode reg+reg instead of reg*2. */
5222 if (!base && index && scale && scale == 2)
5223 base = index, scale = 1;
5225 /* Special case: scaling cannot be encoded without base or displacement. */
5226 if (!base && !disp && index && scale != 1)
5237 /* Return cost of the memory address x.
5238 For i386, it is better to use a complex address than let gcc copy
5239 the address into a reg and make a new pseudo. But not if the address
5240 requires to two regs - that would mean more pseudos with longer
5243 ix86_address_cost (x)
5246 struct ix86_address parts;
5249 if (!ix86_decompose_address (x, &parts))
5252 if (parts.base && GET_CODE (parts.base) == SUBREG)
5253 parts.base = SUBREG_REG (parts.base);
5254 if (parts.index && GET_CODE (parts.index) == SUBREG)
5255 parts.index = SUBREG_REG (parts.index);
5257 /* More complex memory references are better. */
5258 if (parts.disp && parts.disp != const0_rtx)
5261 /* Attempt to minimize number of registers in the address. */
5263 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5265 && (!REG_P (parts.index)
5266 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5270 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5272 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5273 && parts.base != parts.index)
5276 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5277 since it's predecode logic can't detect the length of instructions
5278 and it degenerates to vector decoded. Increase cost of such
5279 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5280 to split such addresses or even refuse such addresses at all.
5282 Following addressing modes are affected:
5287 The first and last case may be avoidable by explicitly coding the zero in
5288 memory address, but I don't have AMD-K6 machine handy to check this
5292 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5293 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5294 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5300 /* If X is a machine specific address (i.e. a symbol or label being
5301 referenced as a displacement from the GOT implemented using an
5302 UNSPEC), then return the base term. Otherwise return X. */
5305 ix86_find_base_term (x)
5312 if (GET_CODE (x) != CONST)
5315 if (GET_CODE (term) == PLUS
5316 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5317 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5318 term = XEXP (term, 0);
5319 if (GET_CODE (term) != UNSPEC
5320 || XINT (term, 1) != UNSPEC_GOTPCREL)
5323 term = XVECEXP (term, 0, 0);
5325 if (GET_CODE (term) != SYMBOL_REF
5326 && GET_CODE (term) != LABEL_REF)
5332 if (GET_CODE (x) != PLUS
5333 || XEXP (x, 0) != pic_offset_table_rtx
5334 || GET_CODE (XEXP (x, 1)) != CONST)
5337 term = XEXP (XEXP (x, 1), 0);
5339 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5340 term = XEXP (term, 0);
5342 if (GET_CODE (term) != UNSPEC
5343 || XINT (term, 1) != UNSPEC_GOTOFF)
5346 term = XVECEXP (term, 0, 0);
5348 if (GET_CODE (term) != SYMBOL_REF
5349 && GET_CODE (term) != LABEL_REF)
5355 /* Determine if a given RTX is a valid constant. We already know this
5356 satisfies CONSTANT_P. */
5359 legitimate_constant_p (x)
5364 switch (GET_CODE (x))
5367 /* TLS symbols are not constant. */
5368 if (tls_symbolic_operand (x, Pmode))
5373 inner = XEXP (x, 0);
5375 /* Offsets of TLS symbols are never valid.
5376 Discourage CSE from creating them. */
5377 if (GET_CODE (inner) == PLUS
5378 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5381 /* Only some unspecs are valid as "constants". */
5382 if (GET_CODE (inner) == UNSPEC)
5383 switch (XINT (inner, 1))
5386 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5396 /* Otherwise we handle everything else in the move patterns. */
5400 /* Determine if it's legal to put X into the constant pool. This
5401 is not possible for the address of thread-local symbols, which
5402 is checked above. */
5405 ix86_cannot_force_const_mem (x)
5408 return !legitimate_constant_p (x);
5411 /* Determine if a given RTX is a valid constant address. */
5414 constant_address_p (x)
5417 switch (GET_CODE (x))
5424 return TARGET_64BIT;
5427 /* For Mach-O, really believe the CONST. */
5430 /* Otherwise fall through. */
5432 return !flag_pic && legitimate_constant_p (x);
5439 /* Nonzero if the constant value X is a legitimate general operand
5440 when generating PIC code. It is given that flag_pic is on and
5441 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5444 legitimate_pic_operand_p (x)
5449 switch (GET_CODE (x))
5452 inner = XEXP (x, 0);
5454 /* Only some unspecs are valid as "constants". */
5455 if (GET_CODE (inner) == UNSPEC)
5456 switch (XINT (inner, 1))
5459 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5467 return legitimate_pic_address_disp_p (x);
5474 /* Determine if a given CONST RTX is a valid memory displacement
5478 legitimate_pic_address_disp_p (disp)
5483 /* In 64bit mode we can allow direct addresses of symbols and labels
5484 when they are not dynamic symbols. */
5487 /* TLS references should always be enclosed in UNSPEC. */
5488 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5490 if (GET_CODE (disp) == SYMBOL_REF
5491 && ix86_cmodel == CM_SMALL_PIC
5492 && (CONSTANT_POOL_ADDRESS_P (disp)
5493 || SYMBOL_REF_FLAG (disp)))
5495 if (GET_CODE (disp) == LABEL_REF)
5497 if (GET_CODE (disp) == CONST
5498 && GET_CODE (XEXP (disp, 0)) == PLUS
5499 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5500 && ix86_cmodel == CM_SMALL_PIC
5501 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5502 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5503 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5504 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5505 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5506 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5509 if (GET_CODE (disp) != CONST)
5511 disp = XEXP (disp, 0);
5515 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5516 of GOT tables. We should not need these anyway. */
5517 if (GET_CODE (disp) != UNSPEC
5518 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5521 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5522 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5528 if (GET_CODE (disp) == PLUS)
5530 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5532 disp = XEXP (disp, 0);
5536 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5537 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5539 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5540 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5541 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5543 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5544 if (strstr (sym_name, "$pb") != 0)
5549 if (GET_CODE (disp) != UNSPEC)
5552 switch (XINT (disp, 1))
5557 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5559 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5560 case UNSPEC_GOTTPOFF:
5561 case UNSPEC_GOTNTPOFF:
5562 case UNSPEC_INDNTPOFF:
5565 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5567 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5569 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5575 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5576 memory address for an instruction. The MODE argument is the machine mode
5577 for the MEM expression that wants to use this address.
5579 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5580 convert common non-canonical forms to canonical form so that they will
5584 legitimate_address_p (mode, addr, strict)
5585 enum machine_mode mode;
5589 struct ix86_address parts;
5590 rtx base, index, disp;
5591 HOST_WIDE_INT scale;
5592 const char *reason = NULL;
5593 rtx reason_rtx = NULL_RTX;
5595 if (TARGET_DEBUG_ADDR)
5598 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5599 GET_MODE_NAME (mode), strict);
5603 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5605 if (TARGET_DEBUG_ADDR)
5606 fprintf (stderr, "Success.\n");
5610 if (ix86_decompose_address (addr, &parts) <= 0)
5612 reason = "decomposition failed";
5617 index = parts.index;
5619 scale = parts.scale;
5621 /* Validate base register.
5623 Don't allow SUBREG's here, it can lead to spill failures when the base
5624 is one word out of a two word structure, which is represented internally
5632 if (GET_CODE (base) == SUBREG)
5633 reg = SUBREG_REG (base);
5637 if (GET_CODE (reg) != REG)
5639 reason = "base is not a register";
5643 if (GET_MODE (base) != Pmode)
5645 reason = "base is not in Pmode";
5649 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5650 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5652 reason = "base is not valid";
5657 /* Validate index register.
5659 Don't allow SUBREG's here, it can lead to spill failures when the index
5660 is one word out of a two word structure, which is represented internally
5668 if (GET_CODE (index) == SUBREG)
5669 reg = SUBREG_REG (index);
5673 if (GET_CODE (reg) != REG)
5675 reason = "index is not a register";
5679 if (GET_MODE (index) != Pmode)
5681 reason = "index is not in Pmode";
5685 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5686 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5688 reason = "index is not valid";
5693 /* Validate scale factor. */
5696 reason_rtx = GEN_INT (scale);
5699 reason = "scale without index";
5703 if (scale != 2 && scale != 4 && scale != 8)
5705 reason = "scale is not a valid multiplier";
5710 /* Validate displacement. */
5715 if (GET_CODE (disp) == CONST
5716 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5717 switch (XINT (XEXP (disp, 0), 1))
5721 case UNSPEC_GOTPCREL:
5724 goto is_legitimate_pic;
5726 case UNSPEC_GOTTPOFF:
5727 case UNSPEC_GOTNTPOFF:
5728 case UNSPEC_INDNTPOFF:
5734 reason = "invalid address unspec";
5738 else if (flag_pic && (SYMBOLIC_CONST (disp)
5740 && !machopic_operand_p (disp)
5745 if (TARGET_64BIT && (index || base))
5747 /* foo@dtpoff(%rX) is ok. */
5748 if (GET_CODE (disp) != CONST
5749 || GET_CODE (XEXP (disp, 0)) != PLUS
5750 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5751 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5752 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5753 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5755 reason = "non-constant pic memory reference";
5759 else if (! legitimate_pic_address_disp_p (disp))
5761 reason = "displacement is an invalid pic construct";
5765 /* This code used to verify that a symbolic pic displacement
5766 includes the pic_offset_table_rtx register.
5768 While this is good idea, unfortunately these constructs may
5769 be created by "adds using lea" optimization for incorrect
5778 This code is nonsensical, but results in addressing
5779 GOT table with pic_offset_table_rtx base. We can't
5780 just refuse it easily, since it gets matched by
5781 "addsi3" pattern, that later gets split to lea in the
5782 case output register differs from input. While this
5783 can be handled by separate addsi pattern for this case
5784 that never results in lea, this seems to be easier and
5785 correct fix for crash to disable this test. */
5787 else if (!CONSTANT_ADDRESS_P (disp))
5789 reason = "displacement is not constant";
5792 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5794 reason = "displacement is out of range";
5797 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5799 reason = "displacement is a const_double";
5804 /* Everything looks valid. */
5805 if (TARGET_DEBUG_ADDR)
5806 fprintf (stderr, "Success.\n");
5810 if (TARGET_DEBUG_ADDR)
5812 fprintf (stderr, "Error: %s\n", reason);
5813 debug_rtx (reason_rtx);
5818 /* Return an unique alias set for the GOT. */
5820 static HOST_WIDE_INT
5821 ix86_GOT_alias_set ()
5823 static HOST_WIDE_INT set = -1;
5825 set = new_alias_set ();
5829 /* Return a legitimate reference for ORIG (an address) using the
5830 register REG. If REG is 0, a new pseudo is generated.
5832 There are two types of references that must be handled:
5834 1. Global data references must load the address from the GOT, via
5835 the PIC reg. An insn is emitted to do this load, and the reg is
5838 2. Static data references, constant pool addresses, and code labels
5839 compute the address as an offset from the GOT, whose base is in
5840 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5841 differentiate them from global data objects. The returned
5842 address is the PIC reg + an unspec constant.
5844 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5845 reg also appears in the address. */
5848 legitimize_pic_address (orig, reg)
5858 reg = gen_reg_rtx (Pmode);
5859 /* Use the generic Mach-O PIC machinery. */
5860 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5863 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5865 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5867 /* This symbol may be referenced via a displacement from the PIC
5868 base address (@GOTOFF). */
5870 if (reload_in_progress)
5871 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5872 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5873 new = gen_rtx_CONST (Pmode, new);
5874 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5878 emit_move_insn (reg, new);
5882 else if (GET_CODE (addr) == SYMBOL_REF)
5886 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5887 new = gen_rtx_CONST (Pmode, new);
5888 new = gen_rtx_MEM (Pmode, new);
5889 RTX_UNCHANGING_P (new) = 1;
5890 set_mem_alias_set (new, ix86_GOT_alias_set ());
5893 reg = gen_reg_rtx (Pmode);
5894 /* Use directly gen_movsi, otherwise the address is loaded
5895 into register for CSE. We don't want to CSE this addresses,
5896 instead we CSE addresses from the GOT table, so skip this. */
5897 emit_insn (gen_movsi (reg, new));
5902 /* This symbol must be referenced via a load from the
5903 Global Offset Table (@GOT). */
5905 if (reload_in_progress)
5906 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5907 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5908 new = gen_rtx_CONST (Pmode, new);
5909 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5910 new = gen_rtx_MEM (Pmode, new);
5911 RTX_UNCHANGING_P (new) = 1;
5912 set_mem_alias_set (new, ix86_GOT_alias_set ());
5915 reg = gen_reg_rtx (Pmode);
5916 emit_move_insn (reg, new);
5922 if (GET_CODE (addr) == CONST)
5924 addr = XEXP (addr, 0);
5926 /* We must match stuff we generate before. Assume the only
5927 unspecs that can get here are ours. Not that we could do
5928 anything with them anyway... */
5929 if (GET_CODE (addr) == UNSPEC
5930 || (GET_CODE (addr) == PLUS
5931 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5933 if (GET_CODE (addr) != PLUS)
5936 if (GET_CODE (addr) == PLUS)
5938 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5940 /* Check first to see if this is a constant offset from a @GOTOFF
5941 symbol reference. */
5942 if (local_symbolic_operand (op0, Pmode)
5943 && GET_CODE (op1) == CONST_INT)
5947 if (reload_in_progress)
5948 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5949 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5951 new = gen_rtx_PLUS (Pmode, new, op1);
5952 new = gen_rtx_CONST (Pmode, new);
5953 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5957 emit_move_insn (reg, new);
5963 if (INTVAL (op1) < -16*1024*1024
5964 || INTVAL (op1) >= 16*1024*1024)
5965 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
5970 base = legitimize_pic_address (XEXP (addr, 0), reg);
5971 new = legitimize_pic_address (XEXP (addr, 1),
5972 base == reg ? NULL_RTX : reg);
5974 if (GET_CODE (new) == CONST_INT)
5975 new = plus_constant (base, INTVAL (new));
5978 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5980 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5981 new = XEXP (new, 1);
5983 new = gen_rtx_PLUS (Pmode, base, new);
5992 ix86_encode_section_info (decl, first)
5994 int first ATTRIBUTE_UNUSED;
5996 bool local_p = (*targetm.binds_local_p) (decl);
5999 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6000 if (GET_CODE (rtl) != MEM)
6002 symbol = XEXP (rtl, 0);
6003 if (GET_CODE (symbol) != SYMBOL_REF)
6006 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6007 symbol so that we may access it directly in the GOT. */
6010 SYMBOL_REF_FLAG (symbol) = local_p;
6012 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6013 "local dynamic", "initial exec" or "local exec" TLS models
6016 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6018 const char *symbol_str;
6021 enum tls_model kind = decl_tls_model (decl);
6023 if (TARGET_64BIT && ! flag_pic)
6025 /* x86-64 doesn't allow non-pic code for shared libraries,
6026 so don't generate GD/LD TLS models for non-pic code. */
6029 case TLS_MODEL_GLOBAL_DYNAMIC:
6030 kind = TLS_MODEL_INITIAL_EXEC; break;
6031 case TLS_MODEL_LOCAL_DYNAMIC:
6032 kind = TLS_MODEL_LOCAL_EXEC; break;
6038 symbol_str = XSTR (symbol, 0);
6040 if (symbol_str[0] == '%')
6042 if (symbol_str[1] == tls_model_chars[kind])
6046 len = strlen (symbol_str) + 1;
6047 newstr = alloca (len + 2);
6050 newstr[1] = tls_model_chars[kind];
6051 memcpy (newstr + 2, symbol_str, len);
6053 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6057 /* Undo the above when printing symbol names. */
6060 ix86_strip_name_encoding (str)
6070 /* Load the thread pointer into a register. */
6073 get_thread_pointer ()
6077 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6078 tp = gen_rtx_MEM (Pmode, tp);
6079 RTX_UNCHANGING_P (tp) = 1;
6080 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6081 tp = force_reg (Pmode, tp);
6086 /* Try machine-dependent ways of modifying an illegitimate address
6087 to be legitimate. If we find one, return the new, valid address.
6088 This macro is used in only one place: `memory_address' in explow.c.
6090 OLDX is the address as it was before break_out_memory_refs was called.
6091 In some cases it is useful to look at this to decide what needs to be done.
6093 MODE and WIN are passed so that this macro can use
6094 GO_IF_LEGITIMATE_ADDRESS.
6096 It is always safe for this macro to do nothing. It exists to recognize
6097 opportunities to optimize the output.
6099 For the 80386, we handle X+REG by loading X into a register R and
6100 using R+REG. R will go in a general reg and indexing will be used.
6101 However, if REG is a broken-out memory address or multiplication,
6102 nothing needs to be done because REG can certainly go in a general reg.
6104 When -fpic is used, special handling is needed for symbolic references.
6105 See comments by legitimize_pic_address in i386.c for details. */
6108 legitimize_address (x, oldx, mode)
6110 register rtx oldx ATTRIBUTE_UNUSED;
6111 enum machine_mode mode;
6116 if (TARGET_DEBUG_ADDR)
6118 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6119 GET_MODE_NAME (mode));
6123 log = tls_symbolic_operand (x, mode);
6126 rtx dest, base, off, pic;
6131 case TLS_MODEL_GLOBAL_DYNAMIC:
6132 dest = gen_reg_rtx (Pmode);
6135 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6138 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6139 insns = get_insns ();
6142 emit_libcall_block (insns, dest, rax, x);
6145 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6148 case TLS_MODEL_LOCAL_DYNAMIC:
6149 base = gen_reg_rtx (Pmode);
6152 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6155 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6156 insns = get_insns ();
6159 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6160 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6161 emit_libcall_block (insns, base, rax, note);
6164 emit_insn (gen_tls_local_dynamic_base_32 (base));
6166 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6167 off = gen_rtx_CONST (Pmode, off);
6169 return gen_rtx_PLUS (Pmode, base, off);
6171 case TLS_MODEL_INITIAL_EXEC:
6175 type = UNSPEC_GOTNTPOFF;
6179 if (reload_in_progress)
6180 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6181 pic = pic_offset_table_rtx;
6182 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6184 else if (!TARGET_GNU_TLS)
6186 pic = gen_reg_rtx (Pmode);
6187 emit_insn (gen_set_got (pic));
6188 type = UNSPEC_GOTTPOFF;
6193 type = UNSPEC_INDNTPOFF;
6196 base = get_thread_pointer ();
6198 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6199 off = gen_rtx_CONST (Pmode, off);
6201 off = gen_rtx_PLUS (Pmode, pic, off);
6202 off = gen_rtx_MEM (Pmode, off);
6203 RTX_UNCHANGING_P (off) = 1;
6204 set_mem_alias_set (off, ix86_GOT_alias_set ());
6205 dest = gen_reg_rtx (Pmode);
6207 if (TARGET_64BIT || TARGET_GNU_TLS)
6209 emit_move_insn (dest, off);
6210 return gen_rtx_PLUS (Pmode, base, dest);
6213 emit_insn (gen_subsi3 (dest, base, off));
6216 case TLS_MODEL_LOCAL_EXEC:
6217 base = get_thread_pointer ();
6219 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6220 (TARGET_64BIT || TARGET_GNU_TLS)
6221 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6222 off = gen_rtx_CONST (Pmode, off);
6224 if (TARGET_64BIT || TARGET_GNU_TLS)
6225 return gen_rtx_PLUS (Pmode, base, off);
6228 dest = gen_reg_rtx (Pmode);
6229 emit_insn (gen_subsi3 (dest, base, off));
6240 if (flag_pic && SYMBOLIC_CONST (x))
6241 return legitimize_pic_address (x, 0);
6243 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6244 if (GET_CODE (x) == ASHIFT
6245 && GET_CODE (XEXP (x, 1)) == CONST_INT
6246 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6249 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6250 GEN_INT (1 << log));
6253 if (GET_CODE (x) == PLUS)
6255 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6257 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6258 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6259 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6262 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6263 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6264 GEN_INT (1 << log));
6267 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6268 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6269 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6272 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6273 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6274 GEN_INT (1 << log));
6277 /* Put multiply first if it isn't already. */
6278 if (GET_CODE (XEXP (x, 1)) == MULT)
6280 rtx tmp = XEXP (x, 0);
6281 XEXP (x, 0) = XEXP (x, 1);
6286 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6287 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6288 created by virtual register instantiation, register elimination, and
6289 similar optimizations. */
6290 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6293 x = gen_rtx_PLUS (Pmode,
6294 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6295 XEXP (XEXP (x, 1), 0)),
6296 XEXP (XEXP (x, 1), 1));
6300 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6301 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6302 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6303 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6304 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6305 && CONSTANT_P (XEXP (x, 1)))
6308 rtx other = NULL_RTX;
6310 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6312 constant = XEXP (x, 1);
6313 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6315 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6317 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6318 other = XEXP (x, 1);
6326 x = gen_rtx_PLUS (Pmode,
6327 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6328 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6329 plus_constant (other, INTVAL (constant)));
6333 if (changed && legitimate_address_p (mode, x, FALSE))
6336 if (GET_CODE (XEXP (x, 0)) == MULT)
6339 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6342 if (GET_CODE (XEXP (x, 1)) == MULT)
6345 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6349 && GET_CODE (XEXP (x, 1)) == REG
6350 && GET_CODE (XEXP (x, 0)) == REG)
6353 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6356 x = legitimize_pic_address (x, 0);
6359 if (changed && legitimate_address_p (mode, x, FALSE))
6362 if (GET_CODE (XEXP (x, 0)) == REG)
6364 register rtx temp = gen_reg_rtx (Pmode);
6365 register rtx val = force_operand (XEXP (x, 1), temp);
6367 emit_move_insn (temp, val);
6373 else if (GET_CODE (XEXP (x, 1)) == REG)
6375 register rtx temp = gen_reg_rtx (Pmode);
6376 register rtx val = force_operand (XEXP (x, 0), temp);
6378 emit_move_insn (temp, val);
6388 /* Print an integer constant expression in assembler syntax. Addition
6389 and subtraction are the only arithmetic that may appear in these
6390 expressions. FILE is the stdio stream to write to, X is the rtx, and
6391 CODE is the operand print code from the output string. */
6394 output_pic_addr_const (file, x, code)
6401 switch (GET_CODE (x))
6411 assemble_name (file, XSTR (x, 0));
6412 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6413 fputs ("@PLT", file);
6420 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6421 assemble_name (asm_out_file, buf);
6425 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6429 /* This used to output parentheses around the expression,
6430 but that does not work on the 386 (either ATT or BSD assembler). */
6431 output_pic_addr_const (file, XEXP (x, 0), code);
6435 if (GET_MODE (x) == VOIDmode)
6437 /* We can use %d if the number is <32 bits and positive. */
6438 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6439 fprintf (file, "0x%lx%08lx",
6440 (unsigned long) CONST_DOUBLE_HIGH (x),
6441 (unsigned long) CONST_DOUBLE_LOW (x));
6443 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6446 /* We can't handle floating point constants;
6447 PRINT_OPERAND must handle them. */
6448 output_operand_lossage ("floating constant misused");
6452 /* Some assemblers need integer constants to appear first. */
6453 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6455 output_pic_addr_const (file, XEXP (x, 0), code);
6457 output_pic_addr_const (file, XEXP (x, 1), code);
6459 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6461 output_pic_addr_const (file, XEXP (x, 1), code);
6463 output_pic_addr_const (file, XEXP (x, 0), code);
6471 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6472 output_pic_addr_const (file, XEXP (x, 0), code);
6474 output_pic_addr_const (file, XEXP (x, 1), code);
6476 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6480 if (XVECLEN (x, 0) != 1)
6482 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6483 switch (XINT (x, 1))
6486 fputs ("@GOT", file);
6489 fputs ("@GOTOFF", file);
6491 case UNSPEC_GOTPCREL:
6492 fputs ("@GOTPCREL(%rip)", file);
6494 case UNSPEC_GOTTPOFF:
6495 /* FIXME: This might be @TPOFF in Sun ld too. */
6496 fputs ("@GOTTPOFF", file);
6499 fputs ("@TPOFF", file);
6503 fputs ("@TPOFF", file);
6505 fputs ("@NTPOFF", file);
6508 fputs ("@DTPOFF", file);
6510 case UNSPEC_GOTNTPOFF:
6512 fputs ("@GOTTPOFF(%rip)", file);
6514 fputs ("@GOTNTPOFF", file);
6516 case UNSPEC_INDNTPOFF:
6517 fputs ("@INDNTPOFF", file);
6520 output_operand_lossage ("invalid UNSPEC as operand");
6526 output_operand_lossage ("invalid expression as operand");
6530 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6531 We need to handle our special PIC relocations. */
6534 i386_dwarf_output_addr_const (file, x)
6539 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6543 fprintf (file, "%s", ASM_LONG);
6546 output_pic_addr_const (file, x, '\0');
6548 output_addr_const (file, x);
6552 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6553 We need to emit DTP-relative relocations. */
6556 i386_output_dwarf_dtprel (file, size, x)
6561 fputs (ASM_LONG, file);
6562 output_addr_const (file, x);
6563 fputs ("@DTPOFF", file);
6569 fputs (", 0", file);
6576 /* In the name of slightly smaller debug output, and to cater to
6577 general assembler losage, recognize PIC+GOTOFF and turn it back
6578 into a direct symbol reference. */
6581 i386_simplify_dwarf_addr (orig_x)
6586 if (GET_CODE (x) == MEM)
6591 if (GET_CODE (x) != CONST
6592 || GET_CODE (XEXP (x, 0)) != UNSPEC
6593 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6594 || GET_CODE (orig_x) != MEM)
6596 return XVECEXP (XEXP (x, 0), 0, 0);
6599 if (GET_CODE (x) != PLUS
6600 || GET_CODE (XEXP (x, 1)) != CONST)
6603 if (GET_CODE (XEXP (x, 0)) == REG
6604 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6605 /* %ebx + GOT/GOTOFF */
6607 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6609 /* %ebx + %reg * scale + GOT/GOTOFF */
6611 if (GET_CODE (XEXP (y, 0)) == REG
6612 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6614 else if (GET_CODE (XEXP (y, 1)) == REG
6615 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6619 if (GET_CODE (y) != REG
6620 && GET_CODE (y) != MULT
6621 && GET_CODE (y) != ASHIFT)
6627 x = XEXP (XEXP (x, 1), 0);
6628 if (GET_CODE (x) == UNSPEC
6629 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6630 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6633 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6634 return XVECEXP (x, 0, 0);
6637 if (GET_CODE (x) == PLUS
6638 && GET_CODE (XEXP (x, 0)) == UNSPEC
6639 && GET_CODE (XEXP (x, 1)) == CONST_INT
6640 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6641 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6642 && GET_CODE (orig_x) != MEM)))
6644 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6646 return gen_rtx_PLUS (Pmode, y, x);
6654 put_condition_code (code, mode, reverse, fp, file)
6656 enum machine_mode mode;
6662 if (mode == CCFPmode || mode == CCFPUmode)
6664 enum rtx_code second_code, bypass_code;
6665 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6666 if (bypass_code != NIL || second_code != NIL)
6668 code = ix86_fp_compare_code_to_integer (code);
6672 code = reverse_condition (code);
6683 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6688 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6689 Those same assemblers have the same but opposite losage on cmov. */
6692 suffix = fp ? "nbe" : "a";
6695 if (mode == CCNOmode || mode == CCGOCmode)
6697 else if (mode == CCmode || mode == CCGCmode)
6708 if (mode == CCNOmode || mode == CCGOCmode)
6710 else if (mode == CCmode || mode == CCGCmode)
6719 suffix = fp ? "nb" : "ae";
6722 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6732 suffix = fp ? "u" : "p";
6735 suffix = fp ? "nu" : "np";
6740 fputs (suffix, file);
6744 print_reg (x, code, file)
6749 if (REGNO (x) == ARG_POINTER_REGNUM
6750 || REGNO (x) == FRAME_POINTER_REGNUM
6751 || REGNO (x) == FLAGS_REG
6752 || REGNO (x) == FPSR_REG)
6755 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6758 if (code == 'w' || MMX_REG_P (x))
6760 else if (code == 'b')
6762 else if (code == 'k')
6764 else if (code == 'q')
6766 else if (code == 'y')
6768 else if (code == 'h')
6771 code = GET_MODE_SIZE (GET_MODE (x));
6773 /* Irritatingly, AMD extended registers use different naming convention
6774 from the normal registers. */
6775 if (REX_INT_REG_P (x))
6782 error ("extended registers have no high halves");
6785 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6788 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6791 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6794 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6797 error ("unsupported operand size for extended register");
6805 if (STACK_TOP_P (x))
6807 fputs ("st(0)", file);
6814 if (! ANY_FP_REG_P (x))
6815 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6819 fputs (hi_reg_name[REGNO (x)], file);
6822 fputs (qi_reg_name[REGNO (x)], file);
6825 fputs (qi_high_reg_name[REGNO (x)], file);
6832 /* Locate some local-dynamic symbol still in use by this function
6833 so that we can print its name in some tls_local_dynamic_base
6837 get_some_local_dynamic_name ()
6841 if (cfun->machine->some_ld_name)
6842 return cfun->machine->some_ld_name;
6844 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6846 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6847 return cfun->machine->some_ld_name;
6853 get_some_local_dynamic_name_1 (px, data)
6855 void *data ATTRIBUTE_UNUSED;
6859 if (GET_CODE (x) == SYMBOL_REF
6860 && local_dynamic_symbolic_operand (x, Pmode))
6862 cfun->machine->some_ld_name = XSTR (x, 0);
6870 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6871 C -- print opcode suffix for set/cmov insn.
6872 c -- like C, but print reversed condition
6873 F,f -- likewise, but for floating-point.
6874 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6876 R -- print the prefix for register names.
6877 z -- print the opcode suffix for the size of the current operand.
6878 * -- print a star (in certain assembler syntax)
6879 A -- print an absolute memory reference.
6880 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6881 s -- print a shift double count, followed by the assemblers argument
6883 b -- print the QImode name of the register for the indicated operand.
6884 %b0 would print %al if operands[0] is reg 0.
6885 w -- likewise, print the HImode name of the register.
6886 k -- likewise, print the SImode name of the register.
6887 q -- likewise, print the DImode name of the register.
6888 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6889 y -- print "st(0)" instead of "st" as a register.
6890 D -- print condition for SSE cmp instruction.
6891 P -- if PIC, print an @PLT suffix.
6892 X -- don't print any sort of PIC '@' suffix for a symbol.
6893 & -- print some in-use local-dynamic symbol name.
6897 print_operand (file, x, code)
6907 if (ASSEMBLER_DIALECT == ASM_ATT)
6912 assemble_name (file, get_some_local_dynamic_name ());
6916 if (ASSEMBLER_DIALECT == ASM_ATT)
6918 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6920 /* Intel syntax. For absolute addresses, registers should not
6921 be surrounded by braces. */
6922 if (GET_CODE (x) != REG)
6925 PRINT_OPERAND (file, x, 0);
6933 PRINT_OPERAND (file, x, 0);
6938 if (ASSEMBLER_DIALECT == ASM_ATT)
6943 if (ASSEMBLER_DIALECT == ASM_ATT)
6948 if (ASSEMBLER_DIALECT == ASM_ATT)
6953 if (ASSEMBLER_DIALECT == ASM_ATT)
6958 if (ASSEMBLER_DIALECT == ASM_ATT)
6963 if (ASSEMBLER_DIALECT == ASM_ATT)
6968 /* 387 opcodes don't get size suffixes if the operands are
6970 if (STACK_REG_P (x))
6973 /* Likewise if using Intel opcodes. */
6974 if (ASSEMBLER_DIALECT == ASM_INTEL)
6977 /* This is the size of op from size of operand. */
6978 switch (GET_MODE_SIZE (GET_MODE (x)))
6981 #ifdef HAVE_GAS_FILDS_FISTS
6987 if (GET_MODE (x) == SFmode)
7002 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7004 #ifdef GAS_MNEMONICS
7030 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7032 PRINT_OPERAND (file, x, 0);
7038 /* Little bit of braindamage here. The SSE compare instructions
7039 does use completely different names for the comparisons that the
7040 fp conditional moves. */
7041 switch (GET_CODE (x))
7056 fputs ("unord", file);
7060 fputs ("neq", file);
7064 fputs ("nlt", file);
7068 fputs ("nle", file);
7071 fputs ("ord", file);
7079 #ifdef CMOV_SUN_AS_SYNTAX
7080 if (ASSEMBLER_DIALECT == ASM_ATT)
7082 switch (GET_MODE (x))
7084 case HImode: putc ('w', file); break;
7086 case SFmode: putc ('l', file); break;
7088 case DFmode: putc ('q', file); break;
7096 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7099 #ifdef CMOV_SUN_AS_SYNTAX
7100 if (ASSEMBLER_DIALECT == ASM_ATT)
7103 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7106 /* Like above, but reverse condition */
7108 /* Check to see if argument to %c is really a constant
7109 and not a condition code which needs to be reversed. */
7110 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7112 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7115 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7118 #ifdef CMOV_SUN_AS_SYNTAX
7119 if (ASSEMBLER_DIALECT == ASM_ATT)
7122 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7128 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7131 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7134 int pred_val = INTVAL (XEXP (x, 0));
7136 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7137 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7139 int taken = pred_val > REG_BR_PROB_BASE / 2;
7140 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7142 /* Emit hints only in the case default branch prediction
7143 heuristics would fail. */
7144 if (taken != cputaken)
7146 /* We use 3e (DS) prefix for taken branches and
7147 2e (CS) prefix for not taken branches. */
7149 fputs ("ds ; ", file);
7151 fputs ("cs ; ", file);
7158 output_operand_lossage ("invalid operand code `%c'", code);
7162 if (GET_CODE (x) == REG)
7164 PRINT_REG (x, code, file);
7167 else if (GET_CODE (x) == MEM)
7169 /* No `byte ptr' prefix for call instructions. */
7170 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7173 switch (GET_MODE_SIZE (GET_MODE (x)))
7175 case 1: size = "BYTE"; break;
7176 case 2: size = "WORD"; break;
7177 case 4: size = "DWORD"; break;
7178 case 8: size = "QWORD"; break;
7179 case 12: size = "XWORD"; break;
7180 case 16: size = "XMMWORD"; break;
7185 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7188 else if (code == 'w')
7190 else if (code == 'k')
7194 fputs (" PTR ", file);
7198 if (flag_pic && CONSTANT_ADDRESS_P (x))
7199 output_pic_addr_const (file, x, code);
7200 /* Avoid (%rip) for call operands. */
7201 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7202 && GET_CODE (x) != CONST_INT)
7203 output_addr_const (file, x);
7204 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7205 output_operand_lossage ("invalid constraints for operand");
7210 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7215 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7216 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7218 if (ASSEMBLER_DIALECT == ASM_ATT)
7220 fprintf (file, "0x%lx", l);
7223 /* These float cases don't actually occur as immediate operands. */
7224 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7228 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7229 fprintf (file, "%s", dstr);
7232 else if (GET_CODE (x) == CONST_DOUBLE
7233 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7237 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7238 fprintf (file, "%s", dstr);
7245 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7247 if (ASSEMBLER_DIALECT == ASM_ATT)
7250 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7251 || GET_CODE (x) == LABEL_REF)
7253 if (ASSEMBLER_DIALECT == ASM_ATT)
7256 fputs ("OFFSET FLAT:", file);
7259 if (GET_CODE (x) == CONST_INT)
7260 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7262 output_pic_addr_const (file, x, code);
7264 output_addr_const (file, x);
7268 /* Print a memory operand whose address is ADDR. */
7271 print_operand_address (file, addr)
7275 struct ix86_address parts;
7276 rtx base, index, disp;
7279 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7281 if (ASSEMBLER_DIALECT == ASM_INTEL)
7282 fputs ("DWORD PTR ", file);
7283 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7286 fputs ("fs:0", file);
7288 fputs ("gs:0", file);
7292 if (! ix86_decompose_address (addr, &parts))
7296 index = parts.index;
7298 scale = parts.scale;
7300 if (!base && !index)
7302 /* Displacement only requires special attention. */
7304 if (GET_CODE (disp) == CONST_INT)
7306 if (ASSEMBLER_DIALECT == ASM_INTEL)
7308 if (USER_LABEL_PREFIX[0] == 0)
7310 fputs ("ds:", file);
7312 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7315 output_pic_addr_const (file, addr, 0);
7317 output_addr_const (file, addr);
7319 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7321 && ((GET_CODE (addr) == SYMBOL_REF
7322 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7323 || GET_CODE (addr) == LABEL_REF
7324 || (GET_CODE (addr) == CONST
7325 && GET_CODE (XEXP (addr, 0)) == PLUS
7326 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7327 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7328 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7329 fputs ("(%rip)", file);
7333 if (ASSEMBLER_DIALECT == ASM_ATT)
7338 output_pic_addr_const (file, disp, 0);
7339 else if (GET_CODE (disp) == LABEL_REF)
7340 output_asm_label (disp);
7342 output_addr_const (file, disp);
7347 PRINT_REG (base, 0, file);
7351 PRINT_REG (index, 0, file);
7353 fprintf (file, ",%d", scale);
7359 rtx offset = NULL_RTX;
7363 /* Pull out the offset of a symbol; print any symbol itself. */
7364 if (GET_CODE (disp) == CONST
7365 && GET_CODE (XEXP (disp, 0)) == PLUS
7366 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7368 offset = XEXP (XEXP (disp, 0), 1);
7369 disp = gen_rtx_CONST (VOIDmode,
7370 XEXP (XEXP (disp, 0), 0));
7374 output_pic_addr_const (file, disp, 0);
7375 else if (GET_CODE (disp) == LABEL_REF)
7376 output_asm_label (disp);
7377 else if (GET_CODE (disp) == CONST_INT)
7380 output_addr_const (file, disp);
7386 PRINT_REG (base, 0, file);
7389 if (INTVAL (offset) >= 0)
7391 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7395 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7402 PRINT_REG (index, 0, file);
7404 fprintf (file, "*%d", scale);
7412 output_addr_const_extra (file, x)
7418 if (GET_CODE (x) != UNSPEC)
7421 op = XVECEXP (x, 0, 0);
7422 switch (XINT (x, 1))
7424 case UNSPEC_GOTTPOFF:
7425 output_addr_const (file, op);
7426 /* FIXME: This might be @TPOFF in Sun ld. */
7427 fputs ("@GOTTPOFF", file);
7430 output_addr_const (file, op);
7431 fputs ("@TPOFF", file);
7434 output_addr_const (file, op);
7436 fputs ("@TPOFF", file);
7438 fputs ("@NTPOFF", file);
7441 output_addr_const (file, op);
7442 fputs ("@DTPOFF", file);
7444 case UNSPEC_GOTNTPOFF:
7445 output_addr_const (file, op);
7447 fputs ("@GOTTPOFF(%rip)", file);
7449 fputs ("@GOTNTPOFF", file);
7451 case UNSPEC_INDNTPOFF:
7452 output_addr_const (file, op);
7453 fputs ("@INDNTPOFF", file);
7463 /* Split one or more DImode RTL references into pairs of SImode
7464 references. The RTL can be REG, offsettable MEM, integer constant, or
7465 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7466 split and "num" is its length. lo_half and hi_half are output arrays
7467 that parallel "operands". */
7470 split_di (operands, num, lo_half, hi_half)
7473 rtx lo_half[], hi_half[];
7477 rtx op = operands[num];
7479 /* simplify_subreg refuse to split volatile memory addresses,
7480 but we still have to handle it. */
7481 if (GET_CODE (op) == MEM)
7483 lo_half[num] = adjust_address (op, SImode, 0);
7484 hi_half[num] = adjust_address (op, SImode, 4);
7488 lo_half[num] = simplify_gen_subreg (SImode, op,
7489 GET_MODE (op) == VOIDmode
7490 ? DImode : GET_MODE (op), 0);
7491 hi_half[num] = simplify_gen_subreg (SImode, op,
7492 GET_MODE (op) == VOIDmode
7493 ? DImode : GET_MODE (op), 4);
7497 /* Split one or more TImode RTL references into pairs of SImode
7498 references. The RTL can be REG, offsettable MEM, integer constant, or
7499 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7500 split and "num" is its length. lo_half and hi_half are output arrays
7501 that parallel "operands". */
7504 split_ti (operands, num, lo_half, hi_half)
7507 rtx lo_half[], hi_half[];
7511 rtx op = operands[num];
7513 /* simplify_subreg refuse to split volatile memory addresses, but we
7514 still have to handle it. */
7515 if (GET_CODE (op) == MEM)
7517 lo_half[num] = adjust_address (op, DImode, 0);
7518 hi_half[num] = adjust_address (op, DImode, 8);
7522 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7523 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7528 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7529 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7530 is the expression of the binary operation. The output may either be
7531 emitted here, or returned to the caller, like all output_* functions.
7533 There is no guarantee that the operands are the same mode, as they
7534 might be within FLOAT or FLOAT_EXTEND expressions. */
7536 #ifndef SYSV386_COMPAT
7537 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7538 wants to fix the assemblers because that causes incompatibility
7539 with gcc. No-one wants to fix gcc because that causes
7540 incompatibility with assemblers... You can use the option of
7541 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7542 #define SYSV386_COMPAT 1
7546 output_387_binary_op (insn, operands)
7550 static char buf[30];
7553 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7555 #ifdef ENABLE_CHECKING
7556 /* Even if we do not want to check the inputs, this documents input
7557 constraints. Which helps in understanding the following code. */
7558 if (STACK_REG_P (operands[0])
7559 && ((REG_P (operands[1])
7560 && REGNO (operands[0]) == REGNO (operands[1])
7561 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7562 || (REG_P (operands[2])
7563 && REGNO (operands[0]) == REGNO (operands[2])
7564 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7565 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7571 switch (GET_CODE (operands[3]))
7574 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7575 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7583 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7584 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7592 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7593 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7601 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7602 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7616 if (GET_MODE (operands[0]) == SFmode)
7617 strcat (buf, "ss\t{%2, %0|%0, %2}");
7619 strcat (buf, "sd\t{%2, %0|%0, %2}");
7624 switch (GET_CODE (operands[3]))
7628 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7630 rtx temp = operands[2];
7631 operands[2] = operands[1];
7635 /* know operands[0] == operands[1]. */
7637 if (GET_CODE (operands[2]) == MEM)
7643 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7645 if (STACK_TOP_P (operands[0]))
7646 /* How is it that we are storing to a dead operand[2]?
7647 Well, presumably operands[1] is dead too. We can't
7648 store the result to st(0) as st(0) gets popped on this
7649 instruction. Instead store to operands[2] (which I
7650 think has to be st(1)). st(1) will be popped later.
7651 gcc <= 2.8.1 didn't have this check and generated
7652 assembly code that the Unixware assembler rejected. */
7653 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7655 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7659 if (STACK_TOP_P (operands[0]))
7660 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7662 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7667 if (GET_CODE (operands[1]) == MEM)
7673 if (GET_CODE (operands[2]) == MEM)
7679 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7682 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7683 derived assemblers, confusingly reverse the direction of
7684 the operation for fsub{r} and fdiv{r} when the
7685 destination register is not st(0). The Intel assembler
7686 doesn't have this brain damage. Read !SYSV386_COMPAT to
7687 figure out what the hardware really does. */
7688 if (STACK_TOP_P (operands[0]))
7689 p = "{p\t%0, %2|rp\t%2, %0}";
7691 p = "{rp\t%2, %0|p\t%0, %2}";
7693 if (STACK_TOP_P (operands[0]))
7694 /* As above for fmul/fadd, we can't store to st(0). */
7695 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7697 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7702 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7705 if (STACK_TOP_P (operands[0]))
7706 p = "{rp\t%0, %1|p\t%1, %0}";
7708 p = "{p\t%1, %0|rp\t%0, %1}";
7710 if (STACK_TOP_P (operands[0]))
7711 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7713 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7718 if (STACK_TOP_P (operands[0]))
7720 if (STACK_TOP_P (operands[1]))
7721 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7723 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7726 else if (STACK_TOP_P (operands[1]))
7729 p = "{\t%1, %0|r\t%0, %1}";
7731 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7737 p = "{r\t%2, %0|\t%0, %2}";
7739 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7752 /* Output code to initialize control word copies used by
7753 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7754 is set to control word rounding downwards. */
7756 emit_i387_cw_initialization (normal, round_down)
7757 rtx normal, round_down;
7759 rtx reg = gen_reg_rtx (HImode);
7761 emit_insn (gen_x86_fnstcw_1 (normal));
7762 emit_move_insn (reg, normal);
7763 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7765 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7767 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7768 emit_move_insn (round_down, reg);
7771 /* Output code for INSN to convert a float to a signed int. OPERANDS
7772 are the insn operands. The output may be [HSD]Imode and the input
7773 operand may be [SDX]Fmode. */
7776 output_fix_trunc (insn, operands)
7780 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7781 int dimode_p = GET_MODE (operands[0]) == DImode;
7783 /* Jump through a hoop or two for DImode, since the hardware has no
7784 non-popping instruction. We used to do this a different way, but
7785 that was somewhat fragile and broke with post-reload splitters. */
7786 if (dimode_p && !stack_top_dies)
7787 output_asm_insn ("fld\t%y1", operands);
7789 if (!STACK_TOP_P (operands[1]))
7792 if (GET_CODE (operands[0]) != MEM)
7795 output_asm_insn ("fldcw\t%3", operands);
7796 if (stack_top_dies || dimode_p)
7797 output_asm_insn ("fistp%z0\t%0", operands);
7799 output_asm_insn ("fist%z0\t%0", operands);
7800 output_asm_insn ("fldcw\t%2", operands);
7805 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7806 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7807 when fucom should be used. */
7810 output_fp_compare (insn, operands, eflags_p, unordered_p)
7813 int eflags_p, unordered_p;
7816 rtx cmp_op0 = operands[0];
7817 rtx cmp_op1 = operands[1];
7818 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7823 cmp_op1 = operands[2];
7827 if (GET_MODE (operands[0]) == SFmode)
7829 return "ucomiss\t{%1, %0|%0, %1}";
7831 return "comiss\t{%1, %0|%0, %1}";
7834 return "ucomisd\t{%1, %0|%0, %1}";
7836 return "comisd\t{%1, %0|%0, %1}";
7839 if (! STACK_TOP_P (cmp_op0))
7842 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7844 if (STACK_REG_P (cmp_op1)
7846 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7847 && REGNO (cmp_op1) != FIRST_STACK_REG)
7849 /* If both the top of the 387 stack dies, and the other operand
7850 is also a stack register that dies, then this must be a
7851 `fcompp' float compare */
7855 /* There is no double popping fcomi variant. Fortunately,
7856 eflags is immune from the fstp's cc clobbering. */
7858 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7860 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7868 return "fucompp\n\tfnstsw\t%0";
7870 return "fcompp\n\tfnstsw\t%0";
7883 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7885 static const char * const alt[24] =
7897 "fcomi\t{%y1, %0|%0, %y1}",
7898 "fcomip\t{%y1, %0|%0, %y1}",
7899 "fucomi\t{%y1, %0|%0, %y1}",
7900 "fucomip\t{%y1, %0|%0, %y1}",
7907 "fcom%z2\t%y2\n\tfnstsw\t%0",
7908 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7909 "fucom%z2\t%y2\n\tfnstsw\t%0",
7910 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7912 "ficom%z2\t%y2\n\tfnstsw\t%0",
7913 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7921 mask = eflags_p << 3;
7922 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7923 mask |= unordered_p << 1;
7924 mask |= stack_top_dies;
7937 ix86_output_addr_vec_elt (file, value)
7941 const char *directive = ASM_LONG;
7946 directive = ASM_QUAD;
7952 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7956 ix86_output_addr_diff_elt (file, value, rel)
7961 fprintf (file, "%s%s%d-%s%d\n",
7962 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7963 else if (HAVE_AS_GOTOFF_IN_DATA)
7964 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
7966 else if (TARGET_MACHO)
7967 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7968 machopic_function_base_name () + 1);
7971 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7972 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
7975 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7979 ix86_expand_clear (dest)
7984 /* We play register width games, which are only valid after reload. */
7985 if (!reload_completed)
7988 /* Avoid HImode and its attendant prefix byte. */
7989 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7990 dest = gen_rtx_REG (SImode, REGNO (dest));
7992 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7994 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7995 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7997 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7998 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8004 /* X is an unchanging MEM. If it is a constant pool reference, return
8005 the constant pool rtx, else NULL. */
8008 maybe_get_pool_constant (x)
8013 if (flag_pic && ! TARGET_64BIT)
8015 if (GET_CODE (x) != PLUS)
8017 if (XEXP (x, 0) != pic_offset_table_rtx)
8020 if (GET_CODE (x) != CONST)
8023 if (GET_CODE (x) != UNSPEC)
8025 if (XINT (x, 1) != UNSPEC_GOTOFF)
8027 x = XVECEXP (x, 0, 0);
8030 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8031 return get_pool_constant (x);
8037 ix86_expand_move (mode, operands)
8038 enum machine_mode mode;
8041 int strict = (reload_in_progress || reload_completed);
8042 rtx insn, op0, op1, tmp;
8047 if (tls_symbolic_operand (op1, Pmode))
8049 op1 = legitimize_address (op1, op1, VOIDmode);
8050 if (GET_CODE (op0) == MEM)
8052 tmp = gen_reg_rtx (mode);
8053 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8057 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8062 rtx temp = ((reload_in_progress
8063 || ((op0 && GET_CODE (op0) == REG)
8065 ? op0 : gen_reg_rtx (Pmode));
8066 op1 = machopic_indirect_data_reference (op1, temp);
8067 op1 = machopic_legitimize_pic_address (op1, mode,
8068 temp == op1 ? 0 : temp);
8072 if (MACHOPIC_INDIRECT)
8073 op1 = machopic_indirect_data_reference (op1, 0);
8077 insn = gen_rtx_SET (VOIDmode, op0, op1);
8081 #endif /* TARGET_MACHO */
8082 if (GET_CODE (op0) == MEM)
8083 op1 = force_reg (Pmode, op1);
8087 if (GET_CODE (temp) != REG)
8088 temp = gen_reg_rtx (Pmode);
8089 temp = legitimize_pic_address (op1, temp);
8097 if (GET_CODE (op0) == MEM
8098 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8099 || !push_operand (op0, mode))
8100 && GET_CODE (op1) == MEM)
8101 op1 = force_reg (mode, op1);
8103 if (push_operand (op0, mode)
8104 && ! general_no_elim_operand (op1, mode))
8105 op1 = copy_to_mode_reg (mode, op1);
8107 /* Force large constants in 64bit compilation into register
8108 to get them CSEed. */
8109 if (TARGET_64BIT && mode == DImode
8110 && immediate_operand (op1, mode)
8111 && !x86_64_zero_extended_value (op1)
8112 && !register_operand (op0, mode)
8113 && optimize && !reload_completed && !reload_in_progress)
8114 op1 = copy_to_mode_reg (mode, op1);
8116 if (FLOAT_MODE_P (mode))
8118 /* If we are loading a floating point constant to a register,
8119 force the value to memory now, since we'll get better code
8120 out the back end. */
8124 else if (GET_CODE (op1) == CONST_DOUBLE
8125 && register_operand (op0, mode))
8126 op1 = validize_mem (force_const_mem (mode, op1));
8130 insn = gen_rtx_SET (VOIDmode, op0, op1);
8136 ix86_expand_vector_move (mode, operands)
8137 enum machine_mode mode;
8140 /* Force constants other than zero into memory. We do not know how
8141 the instructions used to build constants modify the upper 64 bits
8142 of the register, once we have that information we may be able
8143 to handle some of them more efficiently. */
8144 if ((reload_in_progress | reload_completed) == 0
8145 && register_operand (operands[0], mode)
8146 && CONSTANT_P (operands[1]))
8147 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8149 /* Make operand1 a register if it isn't already. */
8151 && !register_operand (operands[0], mode)
8152 && !register_operand (operands[1], mode))
8154 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8155 emit_move_insn (operands[0], temp);
8159 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8162 /* Attempt to expand a binary operator. Make the expansion closer to the
8163 actual machine, then just general_operand, which will allow 3 separate
8164 memory references (one output, two input) in a single insn. */
8167 ix86_expand_binary_operator (code, mode, operands)
8169 enum machine_mode mode;
8172 int matching_memory;
8173 rtx src1, src2, dst, op, clob;
8179 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8180 if (GET_RTX_CLASS (code) == 'c'
8181 && (rtx_equal_p (dst, src2)
8182 || immediate_operand (src1, mode)))
8189 /* If the destination is memory, and we do not have matching source
8190 operands, do things in registers. */
8191 matching_memory = 0;
8192 if (GET_CODE (dst) == MEM)
8194 if (rtx_equal_p (dst, src1))
8195 matching_memory = 1;
8196 else if (GET_RTX_CLASS (code) == 'c'
8197 && rtx_equal_p (dst, src2))
8198 matching_memory = 2;
8200 dst = gen_reg_rtx (mode);
8203 /* Both source operands cannot be in memory. */
8204 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8206 if (matching_memory != 2)
8207 src2 = force_reg (mode, src2);
8209 src1 = force_reg (mode, src1);
8212 /* If the operation is not commutable, source 1 cannot be a constant
8213 or non-matching memory. */
8214 if ((CONSTANT_P (src1)
8215 || (!matching_memory && GET_CODE (src1) == MEM))
8216 && GET_RTX_CLASS (code) != 'c')
8217 src1 = force_reg (mode, src1);
8219 /* If optimizing, copy to regs to improve CSE */
8220 if (optimize && ! no_new_pseudos)
8222 if (GET_CODE (dst) == MEM)
8223 dst = gen_reg_rtx (mode);
8224 if (GET_CODE (src1) == MEM)
8225 src1 = force_reg (mode, src1);
8226 if (GET_CODE (src2) == MEM)
8227 src2 = force_reg (mode, src2);
8230 /* Emit the instruction. */
8232 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8233 if (reload_in_progress)
8235 /* Reload doesn't know about the flags register, and doesn't know that
8236 it doesn't want to clobber it. We can only do this with PLUS. */
8243 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8244 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8247 /* Fix up the destination if needed. */
8248 if (dst != operands[0])
8249 emit_move_insn (operands[0], dst);
8252 /* Return TRUE or FALSE depending on whether the binary operator meets the
8253 appropriate constraints. */
8256 ix86_binary_operator_ok (code, mode, operands)
8258 enum machine_mode mode ATTRIBUTE_UNUSED;
8261 /* Both source operands cannot be in memory. */
8262 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8264 /* If the operation is not commutable, source 1 cannot be a constant. */
8265 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8267 /* If the destination is memory, we must have a matching source operand. */
8268 if (GET_CODE (operands[0]) == MEM
8269 && ! (rtx_equal_p (operands[0], operands[1])
8270 || (GET_RTX_CLASS (code) == 'c'
8271 && rtx_equal_p (operands[0], operands[2]))))
8273 /* If the operation is not commutable and the source 1 is memory, we must
8274 have a matching destination. */
8275 if (GET_CODE (operands[1]) == MEM
8276 && GET_RTX_CLASS (code) != 'c'
8277 && ! rtx_equal_p (operands[0], operands[1]))
8282 /* Attempt to expand a unary operator. Make the expansion closer to the
8283 actual machine, then just general_operand, which will allow 2 separate
8284 memory references (one output, one input) in a single insn. */
8287 ix86_expand_unary_operator (code, mode, operands)
8289 enum machine_mode mode;
8292 int matching_memory;
8293 rtx src, dst, op, clob;
8298 /* If the destination is memory, and we do not have matching source
8299 operands, do things in registers. */
8300 matching_memory = 0;
8301 if (GET_CODE (dst) == MEM)
8303 if (rtx_equal_p (dst, src))
8304 matching_memory = 1;
8306 dst = gen_reg_rtx (mode);
8309 /* When source operand is memory, destination must match. */
8310 if (!matching_memory && GET_CODE (src) == MEM)
8311 src = force_reg (mode, src);
8313 /* If optimizing, copy to regs to improve CSE */
8314 if (optimize && ! no_new_pseudos)
8316 if (GET_CODE (dst) == MEM)
8317 dst = gen_reg_rtx (mode);
8318 if (GET_CODE (src) == MEM)
8319 src = force_reg (mode, src);
8322 /* Emit the instruction. */
8324 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8325 if (reload_in_progress || code == NOT)
8327 /* Reload doesn't know about the flags register, and doesn't know that
8328 it doesn't want to clobber it. */
8335 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8336 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8339 /* Fix up the destination if needed. */
8340 if (dst != operands[0])
8341 emit_move_insn (operands[0], dst);
8344 /* Return TRUE or FALSE depending on whether the unary operator meets the
8345 appropriate constraints. */
8348 ix86_unary_operator_ok (code, mode, operands)
8349 enum rtx_code code ATTRIBUTE_UNUSED;
8350 enum machine_mode mode ATTRIBUTE_UNUSED;
8351 rtx operands[2] ATTRIBUTE_UNUSED;
8353 /* If one of operands is memory, source and destination must match. */
8354 if ((GET_CODE (operands[0]) == MEM
8355 || GET_CODE (operands[1]) == MEM)
8356 && ! rtx_equal_p (operands[0], operands[1]))
8361 /* Return TRUE or FALSE depending on whether the first SET in INSN
8362 has source and destination with matching CC modes, and that the
8363 CC mode is at least as constrained as REQ_MODE. */
8366 ix86_match_ccmode (insn, req_mode)
8368 enum machine_mode req_mode;
8371 enum machine_mode set_mode;
8373 set = PATTERN (insn);
8374 if (GET_CODE (set) == PARALLEL)
8375 set = XVECEXP (set, 0, 0);
8376 if (GET_CODE (set) != SET)
8378 if (GET_CODE (SET_SRC (set)) != COMPARE)
8381 set_mode = GET_MODE (SET_DEST (set));
8385 if (req_mode != CCNOmode
8386 && (req_mode != CCmode
8387 || XEXP (SET_SRC (set), 1) != const0_rtx))
8391 if (req_mode == CCGCmode)
8395 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8399 if (req_mode == CCZmode)
8409 return (GET_MODE (SET_SRC (set)) == set_mode);
8412 /* Generate insn patterns to do an integer compare of OPERANDS. */
8415 ix86_expand_int_compare (code, op0, op1)
8419 enum machine_mode cmpmode;
8422 cmpmode = SELECT_CC_MODE (code, op0, op1);
8423 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8425 /* This is very simple, but making the interface the same as in the
8426 FP case makes the rest of the code easier. */
8427 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8428 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8430 /* Return the test that should be put into the flags user, i.e.
8431 the bcc, scc, or cmov instruction. */
8432 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8435 /* Figure out whether to use ordered or unordered fp comparisons.
8436 Return the appropriate mode to use. */
8439 ix86_fp_compare_mode (code)
8440 enum rtx_code code ATTRIBUTE_UNUSED;
8442 /* ??? In order to make all comparisons reversible, we do all comparisons
8443 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8444 all forms trapping and nontrapping comparisons, we can make inequality
8445 comparisons trapping again, since it results in better code when using
8446 FCOM based compares. */
8447 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8451 ix86_cc_mode (code, op0, op1)
8455 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8456 return ix86_fp_compare_mode (code);
8459 /* Only zero flag is needed. */
8461 case NE: /* ZF!=0 */
8463 /* Codes needing carry flag. */
8464 case GEU: /* CF=0 */
8465 case GTU: /* CF=0 & ZF=0 */
8466 case LTU: /* CF=1 */
8467 case LEU: /* CF=1 | ZF=1 */
8469 /* Codes possibly doable only with sign flag when
8470 comparing against zero. */
8471 case GE: /* SF=OF or SF=0 */
8472 case LT: /* SF<>OF or SF=1 */
8473 if (op1 == const0_rtx)
8476 /* For other cases Carry flag is not required. */
8478 /* Codes doable only with sign flag when comparing
8479 against zero, but we miss jump instruction for it
8480 so we need to use relational tests against overflow
8481 that thus needs to be zero. */
8482 case GT: /* ZF=0 & SF=OF */
8483 case LE: /* ZF=1 | SF<>OF */
8484 if (op1 == const0_rtx)
8488 /* strcmp pattern do (use flags) and combine may ask us for proper
8497 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8500 ix86_use_fcomi_compare (code)
8501 enum rtx_code code ATTRIBUTE_UNUSED;
8503 enum rtx_code swapped_code = swap_condition (code);
8504 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8505 || (ix86_fp_comparison_cost (swapped_code)
8506 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8509 /* Swap, force into registers, or otherwise massage the two operands
8510 to a fp comparison. The operands are updated in place; the new
8511 comparison code is returned. */
8513 static enum rtx_code
8514 ix86_prepare_fp_compare_args (code, pop0, pop1)
8518 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8519 rtx op0 = *pop0, op1 = *pop1;
8520 enum machine_mode op_mode = GET_MODE (op0);
8521 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8523 /* All of the unordered compare instructions only work on registers.
8524 The same is true of the XFmode compare instructions. The same is
8525 true of the fcomi compare instructions. */
8528 && (fpcmp_mode == CCFPUmode
8529 || op_mode == XFmode
8530 || op_mode == TFmode
8531 || ix86_use_fcomi_compare (code)))
8533 op0 = force_reg (op_mode, op0);
8534 op1 = force_reg (op_mode, op1);
8538 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8539 things around if they appear profitable, otherwise force op0
8542 if (standard_80387_constant_p (op0) == 0
8543 || (GET_CODE (op0) == MEM
8544 && ! (standard_80387_constant_p (op1) == 0
8545 || GET_CODE (op1) == MEM)))
8548 tmp = op0, op0 = op1, op1 = tmp;
8549 code = swap_condition (code);
8552 if (GET_CODE (op0) != REG)
8553 op0 = force_reg (op_mode, op0);
8555 if (CONSTANT_P (op1))
8557 if (standard_80387_constant_p (op1))
8558 op1 = force_reg (op_mode, op1);
8560 op1 = validize_mem (force_const_mem (op_mode, op1));
8564 /* Try to rearrange the comparison to make it cheaper. */
8565 if (ix86_fp_comparison_cost (code)
8566 > ix86_fp_comparison_cost (swap_condition (code))
8567 && (GET_CODE (op1) == REG || !no_new_pseudos))
8570 tmp = op0, op0 = op1, op1 = tmp;
8571 code = swap_condition (code);
8572 if (GET_CODE (op0) != REG)
8573 op0 = force_reg (op_mode, op0);
8581 /* Convert comparison codes we use to represent FP comparison to integer
8582 code that will result in proper branch. Return UNKNOWN if no such code
8584 static enum rtx_code
8585 ix86_fp_compare_code_to_integer (code)
8615 /* Split comparison code CODE into comparisons we can do using branch
8616 instructions. BYPASS_CODE is comparison code for branch that will
8617 branch around FIRST_CODE and SECOND_CODE. If some of branches
8618 is not required, set value to NIL.
8619 We never require more than two branches. */
8621 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8622 enum rtx_code code, *bypass_code, *first_code, *second_code;
8628 /* The fcomi comparison sets flags as follows:
8638 case GT: /* GTU - CF=0 & ZF=0 */
8639 case GE: /* GEU - CF=0 */
8640 case ORDERED: /* PF=0 */
8641 case UNORDERED: /* PF=1 */
8642 case UNEQ: /* EQ - ZF=1 */
8643 case UNLT: /* LTU - CF=1 */
8644 case UNLE: /* LEU - CF=1 | ZF=1 */
8645 case LTGT: /* EQ - ZF=0 */
8647 case LT: /* LTU - CF=1 - fails on unordered */
8649 *bypass_code = UNORDERED;
8651 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8653 *bypass_code = UNORDERED;
8655 case EQ: /* EQ - ZF=1 - fails on unordered */
8657 *bypass_code = UNORDERED;
8659 case NE: /* NE - ZF=0 - fails on unordered */
8661 *second_code = UNORDERED;
8663 case UNGE: /* GEU - CF=0 - fails on unordered */
8665 *second_code = UNORDERED;
8667 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8669 *second_code = UNORDERED;
8674 if (!TARGET_IEEE_FP)
8681 /* Return cost of comparison done fcom + arithmetics operations on AX.
8682 All following functions do use number of instructions as a cost metrics.
8683 In future this should be tweaked to compute bytes for optimize_size and
8684 take into account performance of various instructions on various CPUs. */
8686 ix86_fp_comparison_arithmetics_cost (code)
8689 if (!TARGET_IEEE_FP)
8691 /* The cost of code output by ix86_expand_fp_compare. */
8719 /* Return cost of comparison done using fcomi operation.
8720 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8722 ix86_fp_comparison_fcomi_cost (code)
8725 enum rtx_code bypass_code, first_code, second_code;
8726 /* Return arbitrarily high cost when instruction is not supported - this
8727 prevents gcc from using it. */
8730 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8731 return (bypass_code != NIL || second_code != NIL) + 2;
8734 /* Return cost of comparison done using sahf operation.
8735 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8737 ix86_fp_comparison_sahf_cost (code)
8740 enum rtx_code bypass_code, first_code, second_code;
8741 /* Return arbitrarily high cost when instruction is not preferred - this
8742 avoids gcc from using it. */
8743 if (!TARGET_USE_SAHF && !optimize_size)
8745 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8746 return (bypass_code != NIL || second_code != NIL) + 3;
8749 /* Compute cost of the comparison done using any method.
8750 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8752 ix86_fp_comparison_cost (code)
8755 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8758 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8759 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8761 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8762 if (min > sahf_cost)
8764 if (min > fcomi_cost)
8769 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8772 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8774 rtx op0, op1, scratch;
8778 enum machine_mode fpcmp_mode, intcmp_mode;
8780 int cost = ix86_fp_comparison_cost (code);
8781 enum rtx_code bypass_code, first_code, second_code;
8783 fpcmp_mode = ix86_fp_compare_mode (code);
8784 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8787 *second_test = NULL_RTX;
8789 *bypass_test = NULL_RTX;
8791 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8793 /* Do fcomi/sahf based test when profitable. */
8794 if ((bypass_code == NIL || bypass_test)
8795 && (second_code == NIL || second_test)
8796 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8800 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8801 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8807 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8808 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8810 scratch = gen_reg_rtx (HImode);
8811 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8812 emit_insn (gen_x86_sahf_1 (scratch));
8815 /* The FP codes work out to act like unsigned. */
8816 intcmp_mode = fpcmp_mode;
8818 if (bypass_code != NIL)
8819 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8820 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8822 if (second_code != NIL)
8823 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8824 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8829 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8830 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8831 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8833 scratch = gen_reg_rtx (HImode);
8834 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8836 /* In the unordered case, we have to check C2 for NaN's, which
8837 doesn't happen to work out to anything nice combination-wise.
8838 So do some bit twiddling on the value we've got in AH to come
8839 up with an appropriate set of condition codes. */
8841 intcmp_mode = CCNOmode;
8846 if (code == GT || !TARGET_IEEE_FP)
8848 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8853 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8854 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8855 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8856 intcmp_mode = CCmode;
8862 if (code == LT && TARGET_IEEE_FP)
8864 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8865 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8866 intcmp_mode = CCmode;
8871 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8877 if (code == GE || !TARGET_IEEE_FP)
8879 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8884 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8885 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8892 if (code == LE && TARGET_IEEE_FP)
8894 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8895 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8896 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8897 intcmp_mode = CCmode;
8902 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8908 if (code == EQ && TARGET_IEEE_FP)
8910 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8911 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8912 intcmp_mode = CCmode;
8917 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8924 if (code == NE && TARGET_IEEE_FP)
8926 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8927 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8933 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8939 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8943 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8952 /* Return the test that should be put into the flags user, i.e.
8953 the bcc, scc, or cmov instruction. */
8954 return gen_rtx_fmt_ee (code, VOIDmode,
8955 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8960 ix86_expand_compare (code, second_test, bypass_test)
8962 rtx *second_test, *bypass_test;
8965 op0 = ix86_compare_op0;
8966 op1 = ix86_compare_op1;
8969 *second_test = NULL_RTX;
8971 *bypass_test = NULL_RTX;
8973 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8974 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
8975 second_test, bypass_test);
8977 ret = ix86_expand_int_compare (code, op0, op1);
8982 /* Return true if the CODE will result in nontrivial jump sequence. */
8984 ix86_fp_jump_nontrivial_p (code)
8987 enum rtx_code bypass_code, first_code, second_code;
8990 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8991 return bypass_code != NIL || second_code != NIL;
8995 ix86_expand_branch (code, label)
9001 switch (GET_MODE (ix86_compare_op0))
9007 tmp = ix86_expand_compare (code, NULL, NULL);
9008 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9009 gen_rtx_LABEL_REF (VOIDmode, label),
9011 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9021 enum rtx_code bypass_code, first_code, second_code;
9023 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9026 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9028 /* Check whether we will use the natural sequence with one jump. If
9029 so, we can expand jump early. Otherwise delay expansion by
9030 creating compound insn to not confuse optimizers. */
9031 if (bypass_code == NIL && second_code == NIL
9034 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9035 gen_rtx_LABEL_REF (VOIDmode, label),
9040 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9041 ix86_compare_op0, ix86_compare_op1);
9042 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9043 gen_rtx_LABEL_REF (VOIDmode, label),
9045 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9047 use_fcomi = ix86_use_fcomi_compare (code);
9048 vec = rtvec_alloc (3 + !use_fcomi);
9049 RTVEC_ELT (vec, 0) = tmp;
9051 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9053 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9056 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9058 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9066 /* Expand DImode branch into multiple compare+branch. */
9068 rtx lo[2], hi[2], label2;
9069 enum rtx_code code1, code2, code3;
9071 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9073 tmp = ix86_compare_op0;
9074 ix86_compare_op0 = ix86_compare_op1;
9075 ix86_compare_op1 = tmp;
9076 code = swap_condition (code);
9078 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9079 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9081 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9082 avoid two branches. This costs one extra insn, so disable when
9083 optimizing for size. */
9085 if ((code == EQ || code == NE)
9087 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9092 if (hi[1] != const0_rtx)
9093 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9094 NULL_RTX, 0, OPTAB_WIDEN);
9097 if (lo[1] != const0_rtx)
9098 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9099 NULL_RTX, 0, OPTAB_WIDEN);
9101 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9102 NULL_RTX, 0, OPTAB_WIDEN);
9104 ix86_compare_op0 = tmp;
9105 ix86_compare_op1 = const0_rtx;
9106 ix86_expand_branch (code, label);
9110 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9111 op1 is a constant and the low word is zero, then we can just
9112 examine the high word. */
9114 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9117 case LT: case LTU: case GE: case GEU:
9118 ix86_compare_op0 = hi[0];
9119 ix86_compare_op1 = hi[1];
9120 ix86_expand_branch (code, label);
9126 /* Otherwise, we need two or three jumps. */
9128 label2 = gen_label_rtx ();
9131 code2 = swap_condition (code);
9132 code3 = unsigned_condition (code);
9136 case LT: case GT: case LTU: case GTU:
9139 case LE: code1 = LT; code2 = GT; break;
9140 case GE: code1 = GT; code2 = LT; break;
9141 case LEU: code1 = LTU; code2 = GTU; break;
9142 case GEU: code1 = GTU; code2 = LTU; break;
9144 case EQ: code1 = NIL; code2 = NE; break;
9145 case NE: code2 = NIL; break;
9153 * if (hi(a) < hi(b)) goto true;
9154 * if (hi(a) > hi(b)) goto false;
9155 * if (lo(a) < lo(b)) goto true;
9159 ix86_compare_op0 = hi[0];
9160 ix86_compare_op1 = hi[1];
9163 ix86_expand_branch (code1, label);
9165 ix86_expand_branch (code2, label2);
9167 ix86_compare_op0 = lo[0];
9168 ix86_compare_op1 = lo[1];
9169 ix86_expand_branch (code3, label);
9172 emit_label (label2);
9181 /* Split branch based on floating point condition. */
9183 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9185 rtx op1, op2, target1, target2, tmp;
9188 rtx label = NULL_RTX;
9190 int bypass_probability = -1, second_probability = -1, probability = -1;
9193 if (target2 != pc_rtx)
9196 code = reverse_condition_maybe_unordered (code);
9201 condition = ix86_expand_fp_compare (code, op1, op2,
9202 tmp, &second, &bypass);
9204 if (split_branch_probability >= 0)
9206 /* Distribute the probabilities across the jumps.
9207 Assume the BYPASS and SECOND to be always test
9209 probability = split_branch_probability;
9211 /* Value of 1 is low enough to make no need for probability
9212 to be updated. Later we may run some experiments and see
9213 if unordered values are more frequent in practice. */
9215 bypass_probability = 1;
9217 second_probability = 1;
9219 if (bypass != NULL_RTX)
9221 label = gen_label_rtx ();
9222 i = emit_jump_insn (gen_rtx_SET
9224 gen_rtx_IF_THEN_ELSE (VOIDmode,
9226 gen_rtx_LABEL_REF (VOIDmode,
9229 if (bypass_probability >= 0)
9231 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9232 GEN_INT (bypass_probability),
9235 i = emit_jump_insn (gen_rtx_SET
9237 gen_rtx_IF_THEN_ELSE (VOIDmode,
9238 condition, target1, target2)));
9239 if (probability >= 0)
9241 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9242 GEN_INT (probability),
9244 if (second != NULL_RTX)
9246 i = emit_jump_insn (gen_rtx_SET
9248 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9250 if (second_probability >= 0)
9252 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9253 GEN_INT (second_probability),
9256 if (label != NULL_RTX)
9261 ix86_expand_setcc (code, dest)
9265 rtx ret, tmp, tmpreg;
9266 rtx second_test, bypass_test;
9268 if (GET_MODE (ix86_compare_op0) == DImode
9270 return 0; /* FAIL */
9272 if (GET_MODE (dest) != QImode)
9275 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9276 PUT_MODE (ret, QImode);
9281 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9282 if (bypass_test || second_test)
9284 rtx test = second_test;
9286 rtx tmp2 = gen_reg_rtx (QImode);
9293 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9295 PUT_MODE (test, QImode);
9296 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9299 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9301 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9304 return 1; /* DONE */
9307 /* Expand comparison setting or clearing carry flag. Return true when successful
9308 and set pop for the operation. */
9310 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9314 enum machine_mode mode =
9315 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9317 /* Do not handle DImode compares that go trought special path. Also we can't
9318 deal with FP compares yet. This is possible to add. */
9319 if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
9327 /* Convert a==0 into (unsigned)a<1. */
9330 if (op1 != const0_rtx)
9333 code = (code == EQ ? LTU : GEU);
9336 /* Convert a>b into b<a or a>=b-1. */
9339 if (GET_CODE (op1) == CONST_INT)
9341 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9342 /* Bail out on overflow. We still can swap operands but that
9343 would force loading of the constant into register. */
9344 if (op1 == const0_rtx
9345 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9347 code = (code == GTU ? GEU : LTU);
9354 code = (code == GTU ? LTU : GEU);
9358 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9361 if (mode == DImode || op1 != const0_rtx)
9363 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9364 code = (code == LT ? GEU : LTU);
9368 if (mode == DImode || op1 != constm1_rtx)
9370 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9371 code = (code == LE ? GEU : LTU);
9377 ix86_compare_op0 = op0;
9378 ix86_compare_op1 = op1;
9379 *pop = ix86_expand_compare (code, NULL, NULL);
9380 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9386 ix86_expand_int_movcc (operands)
9389 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9390 rtx compare_seq, compare_op;
9391 rtx second_test, bypass_test;
9392 enum machine_mode mode = GET_MODE (operands[0]);
9393 bool sign_bit_compare_p = false;;
9396 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9397 compare_seq = get_insns ();
9400 compare_code = GET_CODE (compare_op);
9402 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9403 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9404 sign_bit_compare_p = true;
9406 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9407 HImode insns, we'd be swallowed in word prefix ops. */
9409 if ((mode != HImode || TARGET_FAST_PREFIX)
9410 && (mode != DImode || TARGET_64BIT)
9411 && GET_CODE (operands[2]) == CONST_INT
9412 && GET_CODE (operands[3]) == CONST_INT)
9414 rtx out = operands[0];
9415 HOST_WIDE_INT ct = INTVAL (operands[2]);
9416 HOST_WIDE_INT cf = INTVAL (operands[3]);
9420 /* Sign bit compares are better done using shifts than we do by using
9422 if (sign_bit_compare_p
9423 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9424 ix86_compare_op1, &compare_op))
9426 /* Detect overlap between destination and compare sources. */
9429 if (!sign_bit_compare_p)
9431 compare_code = GET_CODE (compare_op);
9433 /* To simplify rest of code, restrict to the GEU case. */
9434 if (compare_code == LTU)
9436 HOST_WIDE_INT tmp = ct;
9439 compare_code = reverse_condition (compare_code);
9440 code = reverse_condition (code);
9444 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9445 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9446 tmp = gen_reg_rtx (mode);
9449 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
9451 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
9455 if (code == GT || code == GE)
9456 code = reverse_condition (code);
9459 HOST_WIDE_INT tmp = ct;
9463 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9464 ix86_compare_op1, VOIDmode, 0, -1);
9477 tmp = expand_simple_binop (mode, PLUS,
9479 copy_rtx (tmp), 1, OPTAB_DIRECT);
9490 tmp = expand_simple_binop (mode, IOR,
9492 copy_rtx (tmp), 1, OPTAB_DIRECT);
9494 else if (diff == -1 && ct)
9504 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9506 tmp = expand_simple_binop (mode, PLUS,
9507 copy_rtx (tmp), GEN_INT (cf),
9508 copy_rtx (tmp), 1, OPTAB_DIRECT);
9516 * andl cf - ct, dest
9526 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9529 tmp = expand_simple_binop (mode, AND,
9531 gen_int_mode (cf - ct, mode),
9532 copy_rtx (tmp), 1, OPTAB_DIRECT);
9534 tmp = expand_simple_binop (mode, PLUS,
9535 copy_rtx (tmp), GEN_INT (ct),
9536 copy_rtx (tmp), 1, OPTAB_DIRECT);
9539 if (!rtx_equal_p (tmp, out))
9540 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9542 return 1; /* DONE */
9548 tmp = ct, ct = cf, cf = tmp;
9550 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9552 /* We may be reversing unordered compare to normal compare, that
9553 is not valid in general (we may convert non-trapping condition
9554 to trapping one), however on i386 we currently emit all
9555 comparisons unordered. */
9556 compare_code = reverse_condition_maybe_unordered (compare_code);
9557 code = reverse_condition_maybe_unordered (code);
9561 compare_code = reverse_condition (compare_code);
9562 code = reverse_condition (code);
9567 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9568 && GET_CODE (ix86_compare_op1) == CONST_INT)
9570 if (ix86_compare_op1 == const0_rtx
9571 && (code == LT || code == GE))
9572 compare_code = code;
9573 else if (ix86_compare_op1 == constm1_rtx)
9577 else if (code == GT)
9582 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9583 if (compare_code != NIL
9584 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9585 && (cf == -1 || ct == -1))
9587 /* If lea code below could be used, only optimize
9588 if it results in a 2 insn sequence. */
9590 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9591 || diff == 3 || diff == 5 || diff == 9)
9592 || (compare_code == LT && ct == -1)
9593 || (compare_code == GE && cf == -1))
9596 * notl op1 (if necessary)
9604 code = reverse_condition (code);
9607 out = emit_store_flag (out, code, ix86_compare_op0,
9608 ix86_compare_op1, VOIDmode, 0, -1);
9610 out = expand_simple_binop (mode, IOR,
9612 out, 1, OPTAB_DIRECT);
9613 if (out != operands[0])
9614 emit_move_insn (operands[0], out);
9616 return 1; /* DONE */
9621 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9622 || diff == 3 || diff == 5 || diff == 9)
9623 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9624 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9630 * lea cf(dest*(ct-cf)),dest
9634 * This also catches the degenerate setcc-only case.
9640 out = emit_store_flag (out, code, ix86_compare_op0,
9641 ix86_compare_op1, VOIDmode, 0, 1);
9644 /* On x86_64 the lea instruction operates on Pmode, so we need
9645 to get arithmetics done in proper mode to match. */
9647 tmp = copy_rtx (out);
9651 out1 = copy_rtx (out);
9652 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9656 tmp = gen_rtx_PLUS (mode, tmp, out1);
9662 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9665 if (!rtx_equal_p (tmp, out))
9668 out = force_operand (tmp, copy_rtx (out));
9670 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9672 if (!rtx_equal_p (out, operands[0]))
9673 emit_move_insn (operands[0], copy_rtx (out));
9675 return 1; /* DONE */
9679 * General case: Jumpful:
9680 * xorl dest,dest cmpl op1, op2
9681 * cmpl op1, op2 movl ct, dest
9683 * decl dest movl cf, dest
9684 * andl (cf-ct),dest 1:
9689 * This is reasonably steep, but branch mispredict costs are
9690 * high on modern cpus, so consider failing only if optimizing
9694 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9695 && BRANCH_COST >= 2)
9701 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9702 /* We may be reversing unordered compare to normal compare,
9703 that is not valid in general (we may convert non-trapping
9704 condition to trapping one), however on i386 we currently
9705 emit all comparisons unordered. */
9706 code = reverse_condition_maybe_unordered (code);
9709 code = reverse_condition (code);
9710 if (compare_code != NIL)
9711 compare_code = reverse_condition (compare_code);
9715 if (compare_code != NIL)
9717 /* notl op1 (if needed)
9722 For x < 0 (resp. x <= -1) there will be no notl,
9723 so if possible swap the constants to get rid of the
9725 True/false will be -1/0 while code below (store flag
9726 followed by decrement) is 0/-1, so the constants need
9727 to be exchanged once more. */
9729 if (compare_code == GE || !cf)
9731 code = reverse_condition (code);
9736 HOST_WIDE_INT tmp = cf;
9741 out = emit_store_flag (out, code, ix86_compare_op0,
9742 ix86_compare_op1, VOIDmode, 0, -1);
9746 out = emit_store_flag (out, code, ix86_compare_op0,
9747 ix86_compare_op1, VOIDmode, 0, 1);
9749 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9750 copy_rtx (out), 1, OPTAB_DIRECT);
9753 out = expand_simple_binop (mode, AND, copy_rtx (out),
9754 gen_int_mode (cf - ct, mode),
9755 copy_rtx (out), 1, OPTAB_DIRECT);
9757 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9758 copy_rtx (out), 1, OPTAB_DIRECT);
9759 if (!rtx_equal_p (out, operands[0]))
9760 emit_move_insn (operands[0], copy_rtx (out));
9762 return 1; /* DONE */
9766 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9768 /* Try a few things more with specific constants and a variable. */
9771 rtx var, orig_out, out, tmp;
9773 if (BRANCH_COST <= 2)
9774 return 0; /* FAIL */
9776 /* If one of the two operands is an interesting constant, load a
9777 constant with the above and mask it in with a logical operation. */
9779 if (GET_CODE (operands[2]) == CONST_INT)
9782 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9783 operands[3] = constm1_rtx, op = and_optab;
9784 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9785 operands[3] = const0_rtx, op = ior_optab;
9787 return 0; /* FAIL */
9789 else if (GET_CODE (operands[3]) == CONST_INT)
9792 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9793 operands[2] = constm1_rtx, op = and_optab;
9794 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9795 operands[2] = const0_rtx, op = ior_optab;
9797 return 0; /* FAIL */
9800 return 0; /* FAIL */
9802 orig_out = operands[0];
9803 tmp = gen_reg_rtx (mode);
9806 /* Recurse to get the constant loaded. */
9807 if (ix86_expand_int_movcc (operands) == 0)
9808 return 0; /* FAIL */
9810 /* Mask in the interesting variable. */
9811 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9813 if (!rtx_equal_p (out, orig_out))
9814 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9816 return 1; /* DONE */
9820 * For comparison with above,
9830 if (! nonimmediate_operand (operands[2], mode))
9831 operands[2] = force_reg (mode, operands[2]);
9832 if (! nonimmediate_operand (operands[3], mode))
9833 operands[3] = force_reg (mode, operands[3]);
9835 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9837 rtx tmp = gen_reg_rtx (mode);
9838 emit_move_insn (tmp, operands[3]);
9841 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9843 rtx tmp = gen_reg_rtx (mode);
9844 emit_move_insn (tmp, operands[2]);
9848 if (! register_operand (operands[2], VOIDmode)
9850 || ! register_operand (operands[3], VOIDmode)))
9851 operands[2] = force_reg (mode, operands[2]);
9854 && ! register_operand (operands[3], VOIDmode))
9855 operands[3] = force_reg (mode, operands[3]);
9857 emit_insn (compare_seq);
9858 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9859 gen_rtx_IF_THEN_ELSE (mode,
9860 compare_op, operands[2],
9863 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9864 gen_rtx_IF_THEN_ELSE (mode,
9866 copy_rtx (operands[3]),
9867 copy_rtx (operands[0]))));
9869 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9870 gen_rtx_IF_THEN_ELSE (mode,
9872 copy_rtx (operands[2]),
9873 copy_rtx (operands[0]))));
9875 return 1; /* DONE */
9879 ix86_expand_fp_movcc (operands)
9884 rtx compare_op, second_test, bypass_test;
9886 /* For SF/DFmode conditional moves based on comparisons
9887 in same mode, we may want to use SSE min/max instructions. */
9888 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9889 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9890 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9891 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9893 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9894 /* We may be called from the post-reload splitter. */
9895 && (!REG_P (operands[0])
9896 || SSE_REG_P (operands[0])
9897 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9899 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9900 code = GET_CODE (operands[1]);
9902 /* See if we have (cross) match between comparison operands and
9903 conditional move operands. */
9904 if (rtx_equal_p (operands[2], op1))
9909 code = reverse_condition_maybe_unordered (code);
9911 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9913 /* Check for min operation. */
9914 if (code == LT || code == UNLE)
9922 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9923 if (memory_operand (op0, VOIDmode))
9924 op0 = force_reg (GET_MODE (operands[0]), op0);
9925 if (GET_MODE (operands[0]) == SFmode)
9926 emit_insn (gen_minsf3 (operands[0], op0, op1));
9928 emit_insn (gen_mindf3 (operands[0], op0, op1));
9931 /* Check for max operation. */
9932 if (code == GT || code == UNGE)
9940 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9941 if (memory_operand (op0, VOIDmode))
9942 op0 = force_reg (GET_MODE (operands[0]), op0);
9943 if (GET_MODE (operands[0]) == SFmode)
9944 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9946 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9950 /* Manage condition to be sse_comparison_operator. In case we are
9951 in non-ieee mode, try to canonicalize the destination operand
9952 to be first in the comparison - this helps reload to avoid extra
9954 if (!sse_comparison_operator (operands[1], VOIDmode)
9955 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9957 rtx tmp = ix86_compare_op0;
9958 ix86_compare_op0 = ix86_compare_op1;
9959 ix86_compare_op1 = tmp;
9960 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9961 VOIDmode, ix86_compare_op0,
9964 /* Similarly try to manage result to be first operand of conditional
9965 move. We also don't support the NE comparison on SSE, so try to
9967 if ((rtx_equal_p (operands[0], operands[3])
9968 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9969 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
9971 rtx tmp = operands[2];
9972 operands[2] = operands[3];
9974 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9975 (GET_CODE (operands[1])),
9976 VOIDmode, ix86_compare_op0,
9979 if (GET_MODE (operands[0]) == SFmode)
9980 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9981 operands[2], operands[3],
9982 ix86_compare_op0, ix86_compare_op1));
9984 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9985 operands[2], operands[3],
9986 ix86_compare_op0, ix86_compare_op1));
9990 /* The floating point conditional move instructions don't directly
9991 support conditions resulting from a signed integer comparison. */
9993 code = GET_CODE (operands[1]);
9994 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9996 /* The floating point conditional move instructions don't directly
9997 support signed integer comparisons. */
9999 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10001 if (second_test != NULL || bypass_test != NULL)
10003 tmp = gen_reg_rtx (QImode);
10004 ix86_expand_setcc (code, tmp);
10006 ix86_compare_op0 = tmp;
10007 ix86_compare_op1 = const0_rtx;
10008 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10010 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10012 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10013 emit_move_insn (tmp, operands[3]);
10016 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10018 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10019 emit_move_insn (tmp, operands[2]);
10023 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10024 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10029 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10030 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10035 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10036 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10044 /* Expand conditional increment or decrement using adb/sbb instructions.
10045 The default case using setcc followed by the conditional move can be
10046 done by generic code. */
10048 ix86_expand_int_addcc (operands)
10051 enum rtx_code code = GET_CODE (operands[1]);
10053 rtx val = const0_rtx;
10055 if (operands[3] != const1_rtx
10056 && operands[3] != constm1_rtx)
10058 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10059 ix86_compare_op1, &compare_op))
10061 if (GET_CODE (compare_op) != LTU)
10063 if ((GET_CODE (compare_op) == LTU) == (operands[3] == constm1_rtx))
10065 switch (GET_MODE (operands[0]))
10068 emit_insn (gen_subqi3_carry (operands[0], operands[2], val));
10071 emit_insn (gen_subhi3_carry (operands[0], operands[2], val));
10074 emit_insn (gen_subsi3_carry (operands[0], operands[2], val));
10077 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val));
10085 switch (GET_MODE (operands[0]))
10088 emit_insn (gen_addqi3_carry (operands[0], operands[2], val));
10091 emit_insn (gen_addhi3_carry (operands[0], operands[2], val));
10094 emit_insn (gen_addsi3_carry (operands[0], operands[2], val));
10097 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val));
10103 return 1; /* DONE */
10107 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10108 works for floating pointer parameters and nonoffsetable memories.
10109 For pushes, it returns just stack offsets; the values will be saved
10110 in the right order. Maximally three parts are generated. */
10113 ix86_split_to_parts (operand, parts, mode)
10116 enum machine_mode mode;
10121 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10123 size = (GET_MODE_SIZE (mode) + 4) / 8;
10125 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10127 if (size < 2 || size > 3)
10130 /* Optimize constant pool reference to immediates. This is used by fp
10131 moves, that force all constants to memory to allow combining. */
10132 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10134 rtx tmp = maybe_get_pool_constant (operand);
10139 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10141 /* The only non-offsetable memories we handle are pushes. */
10142 if (! push_operand (operand, VOIDmode))
10145 operand = copy_rtx (operand);
10146 PUT_MODE (operand, Pmode);
10147 parts[0] = parts[1] = parts[2] = operand;
10149 else if (!TARGET_64BIT)
10151 if (mode == DImode)
10152 split_di (&operand, 1, &parts[0], &parts[1]);
10155 if (REG_P (operand))
10157 if (!reload_completed)
10159 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10160 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10162 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10164 else if (offsettable_memref_p (operand))
10166 operand = adjust_address (operand, SImode, 0);
10167 parts[0] = operand;
10168 parts[1] = adjust_address (operand, SImode, 4);
10170 parts[2] = adjust_address (operand, SImode, 8);
10172 else if (GET_CODE (operand) == CONST_DOUBLE)
10177 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10182 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10183 parts[2] = gen_int_mode (l[2], SImode);
10186 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10191 parts[1] = gen_int_mode (l[1], SImode);
10192 parts[0] = gen_int_mode (l[0], SImode);
10200 if (mode == TImode)
10201 split_ti (&operand, 1, &parts[0], &parts[1]);
10202 if (mode == XFmode || mode == TFmode)
10204 if (REG_P (operand))
10206 if (!reload_completed)
10208 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10209 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10211 else if (offsettable_memref_p (operand))
10213 operand = adjust_address (operand, DImode, 0);
10214 parts[0] = operand;
10215 parts[1] = adjust_address (operand, SImode, 8);
10217 else if (GET_CODE (operand) == CONST_DOUBLE)
10222 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10223 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10224 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10225 if (HOST_BITS_PER_WIDE_INT >= 64)
10228 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10229 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10232 parts[0] = immed_double_const (l[0], l[1], DImode);
10233 parts[1] = gen_int_mode (l[2], SImode);
10243 /* Emit insns to perform a move or push of DI, DF, and XF values.
10244 Return false when normal moves are needed; true when all required
10245 insns have been emitted. Operands 2-4 contain the input values
10246 int the correct order; operands 5-7 contain the output values. */
10249 ix86_split_long_move (operands)
10255 int collisions = 0;
10256 enum machine_mode mode = GET_MODE (operands[0]);
10258 /* The DFmode expanders may ask us to move double.
10259 For 64bit target this is single move. By hiding the fact
10260 here we simplify i386.md splitters. */
10261 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10263 /* Optimize constant pool reference to immediates. This is used by
10264 fp moves, that force all constants to memory to allow combining. */
10266 if (GET_CODE (operands[1]) == MEM
10267 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10268 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10269 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10270 if (push_operand (operands[0], VOIDmode))
10272 operands[0] = copy_rtx (operands[0]);
10273 PUT_MODE (operands[0], Pmode);
10276 operands[0] = gen_lowpart (DImode, operands[0]);
10277 operands[1] = gen_lowpart (DImode, operands[1]);
10278 emit_move_insn (operands[0], operands[1]);
10282 /* The only non-offsettable memory we handle is push. */
10283 if (push_operand (operands[0], VOIDmode))
10285 else if (GET_CODE (operands[0]) == MEM
10286 && ! offsettable_memref_p (operands[0]))
10289 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10290 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10292 /* When emitting push, take care for source operands on the stack. */
10293 if (push && GET_CODE (operands[1]) == MEM
10294 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10297 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10298 XEXP (part[1][2], 0));
10299 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10300 XEXP (part[1][1], 0));
10303 /* We need to do copy in the right order in case an address register
10304 of the source overlaps the destination. */
10305 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10307 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10309 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10312 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10315 /* Collision in the middle part can be handled by reordering. */
10316 if (collisions == 1 && nparts == 3
10317 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10320 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10321 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10324 /* If there are more collisions, we can't handle it by reordering.
10325 Do an lea to the last part and use only one colliding move. */
10326 else if (collisions > 1)
10329 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10330 XEXP (part[1][0], 0)));
10331 part[1][0] = change_address (part[1][0],
10332 TARGET_64BIT ? DImode : SImode,
10333 part[0][nparts - 1]);
10334 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10336 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10346 /* We use only first 12 bytes of TFmode value, but for pushing we
10347 are required to adjust stack as if we were pushing real 16byte
10349 if (mode == TFmode && !TARGET_64BIT)
10350 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10352 emit_move_insn (part[0][2], part[1][2]);
10357 /* In 64bit mode we don't have 32bit push available. In case this is
10358 register, it is OK - we will just use larger counterpart. We also
10359 retype memory - these comes from attempt to avoid REX prefix on
10360 moving of second half of TFmode value. */
10361 if (GET_MODE (part[1][1]) == SImode)
10363 if (GET_CODE (part[1][1]) == MEM)
10364 part[1][1] = adjust_address (part[1][1], DImode, 0);
10365 else if (REG_P (part[1][1]))
10366 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10369 if (GET_MODE (part[1][0]) == SImode)
10370 part[1][0] = part[1][1];
10373 emit_move_insn (part[0][1], part[1][1]);
10374 emit_move_insn (part[0][0], part[1][0]);
10378 /* Choose correct order to not overwrite the source before it is copied. */
10379 if ((REG_P (part[0][0])
10380 && REG_P (part[1][1])
10381 && (REGNO (part[0][0]) == REGNO (part[1][1])
10383 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10385 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10389 operands[2] = part[0][2];
10390 operands[3] = part[0][1];
10391 operands[4] = part[0][0];
10392 operands[5] = part[1][2];
10393 operands[6] = part[1][1];
10394 operands[7] = part[1][0];
10398 operands[2] = part[0][1];
10399 operands[3] = part[0][0];
10400 operands[5] = part[1][1];
10401 operands[6] = part[1][0];
10408 operands[2] = part[0][0];
10409 operands[3] = part[0][1];
10410 operands[4] = part[0][2];
10411 operands[5] = part[1][0];
10412 operands[6] = part[1][1];
10413 operands[7] = part[1][2];
10417 operands[2] = part[0][0];
10418 operands[3] = part[0][1];
10419 operands[5] = part[1][0];
10420 operands[6] = part[1][1];
10423 emit_move_insn (operands[2], operands[5]);
10424 emit_move_insn (operands[3], operands[6]);
10426 emit_move_insn (operands[4], operands[7]);
10432 ix86_split_ashldi (operands, scratch)
10433 rtx *operands, scratch;
10435 rtx low[2], high[2];
10438 if (GET_CODE (operands[2]) == CONST_INT)
10440 split_di (operands, 2, low, high);
10441 count = INTVAL (operands[2]) & 63;
10445 emit_move_insn (high[0], low[1]);
10446 emit_move_insn (low[0], const0_rtx);
10449 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10453 if (!rtx_equal_p (operands[0], operands[1]))
10454 emit_move_insn (operands[0], operands[1]);
10455 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10456 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10461 if (!rtx_equal_p (operands[0], operands[1]))
10462 emit_move_insn (operands[0], operands[1]);
10464 split_di (operands, 1, low, high);
10466 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10467 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10469 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10471 if (! no_new_pseudos)
10472 scratch = force_reg (SImode, const0_rtx);
10474 emit_move_insn (scratch, const0_rtx);
10476 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10480 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10485 ix86_split_ashrdi (operands, scratch)
10486 rtx *operands, scratch;
10488 rtx low[2], high[2];
10491 if (GET_CODE (operands[2]) == CONST_INT)
10493 split_di (operands, 2, low, high);
10494 count = INTVAL (operands[2]) & 63;
10498 emit_move_insn (low[0], high[1]);
10500 if (! reload_completed)
10501 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10504 emit_move_insn (high[0], low[0]);
10505 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10509 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10513 if (!rtx_equal_p (operands[0], operands[1]))
10514 emit_move_insn (operands[0], operands[1]);
10515 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10516 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10521 if (!rtx_equal_p (operands[0], operands[1]))
10522 emit_move_insn (operands[0], operands[1]);
10524 split_di (operands, 1, low, high);
10526 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10527 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10529 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10531 if (! no_new_pseudos)
10532 scratch = gen_reg_rtx (SImode);
10533 emit_move_insn (scratch, high[0]);
10534 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10535 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10539 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10544 ix86_split_lshrdi (operands, scratch)
10545 rtx *operands, scratch;
10547 rtx low[2], high[2];
10550 if (GET_CODE (operands[2]) == CONST_INT)
10552 split_di (operands, 2, low, high);
10553 count = INTVAL (operands[2]) & 63;
10557 emit_move_insn (low[0], high[1]);
10558 emit_move_insn (high[0], const0_rtx);
10561 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10565 if (!rtx_equal_p (operands[0], operands[1]))
10566 emit_move_insn (operands[0], operands[1]);
10567 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10568 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10573 if (!rtx_equal_p (operands[0], operands[1]))
10574 emit_move_insn (operands[0], operands[1]);
10576 split_di (operands, 1, low, high);
10578 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10579 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10581 /* Heh. By reversing the arguments, we can reuse this pattern. */
10582 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10584 if (! no_new_pseudos)
10585 scratch = force_reg (SImode, const0_rtx);
10587 emit_move_insn (scratch, const0_rtx);
10589 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10593 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10597 /* Helper function for the string operations below. Dest VARIABLE whether
10598 it is aligned to VALUE bytes. If true, jump to the label. */
10600 ix86_expand_aligntest (variable, value)
10604 rtx label = gen_label_rtx ();
10605 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10606 if (GET_MODE (variable) == DImode)
10607 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10609 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10610 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10615 /* Adjust COUNTER by the VALUE. */
10617 ix86_adjust_counter (countreg, value)
10619 HOST_WIDE_INT value;
10621 if (GET_MODE (countreg) == DImode)
10622 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10624 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10627 /* Zero extend possibly SImode EXP to Pmode register. */
10629 ix86_zero_extend_to_Pmode (exp)
10633 if (GET_MODE (exp) == VOIDmode)
10634 return force_reg (Pmode, exp);
10635 if (GET_MODE (exp) == Pmode)
10636 return copy_to_mode_reg (Pmode, exp);
10637 r = gen_reg_rtx (Pmode);
10638 emit_insn (gen_zero_extendsidi2 (r, exp));
10642 /* Expand string move (memcpy) operation. Use i386 string operations when
10643 profitable. expand_clrstr contains similar code. */
10645 ix86_expand_movstr (dst, src, count_exp, align_exp)
10646 rtx dst, src, count_exp, align_exp;
10648 rtx srcreg, destreg, countreg;
10649 enum machine_mode counter_mode;
10650 HOST_WIDE_INT align = 0;
10651 unsigned HOST_WIDE_INT count = 0;
10655 if (GET_CODE (align_exp) == CONST_INT)
10656 align = INTVAL (align_exp);
10658 /* This simple hack avoids all inlining code and simplifies code below. */
10659 if (!TARGET_ALIGN_STRINGOPS)
10662 if (GET_CODE (count_exp) == CONST_INT)
10664 count = INTVAL (count_exp);
10665 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10669 /* Figure out proper mode for counter. For 32bits it is always SImode,
10670 for 64bits use SImode when possible, otherwise DImode.
10671 Set count to number of bytes copied when known at compile time. */
10672 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10673 || x86_64_zero_extended_value (count_exp))
10674 counter_mode = SImode;
10676 counter_mode = DImode;
10680 if (counter_mode != SImode && counter_mode != DImode)
10683 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10684 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10686 emit_insn (gen_cld ());
10688 /* When optimizing for size emit simple rep ; movsb instruction for
10689 counts not divisible by 4. */
10691 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10693 countreg = ix86_zero_extend_to_Pmode (count_exp);
10695 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10696 destreg, srcreg, countreg));
10698 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10699 destreg, srcreg, countreg));
10702 /* For constant aligned (or small unaligned) copies use rep movsl
10703 followed by code copying the rest. For PentiumPro ensure 8 byte
10704 alignment to allow rep movsl acceleration. */
10706 else if (count != 0
10708 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10709 || optimize_size || count < (unsigned int) 64))
10711 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10712 if (count & ~(size - 1))
10714 countreg = copy_to_mode_reg (counter_mode,
10715 GEN_INT ((count >> (size == 4 ? 2 : 3))
10716 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10717 countreg = ix86_zero_extend_to_Pmode (countreg);
10721 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10722 destreg, srcreg, countreg));
10724 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10725 destreg, srcreg, countreg));
10728 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10729 destreg, srcreg, countreg));
10731 if (size == 8 && (count & 0x04))
10732 emit_insn (gen_strmovsi (destreg, srcreg));
10734 emit_insn (gen_strmovhi (destreg, srcreg));
10736 emit_insn (gen_strmovqi (destreg, srcreg));
10738 /* The generic code based on the glibc implementation:
10739 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10740 allowing accelerated copying there)
10741 - copy the data using rep movsl
10742 - copy the rest. */
10747 int desired_alignment = (TARGET_PENTIUMPRO
10748 && (count == 0 || count >= (unsigned int) 260)
10749 ? 8 : UNITS_PER_WORD);
10751 /* In case we don't know anything about the alignment, default to
10752 library version, since it is usually equally fast and result in
10755 Also emit call when we know that the count is large and call overhead
10756 will not be important. */
10757 if (!TARGET_INLINE_ALL_STRINGOPS
10758 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10764 if (TARGET_SINGLE_STRINGOP)
10765 emit_insn (gen_cld ());
10767 countreg2 = gen_reg_rtx (Pmode);
10768 countreg = copy_to_mode_reg (counter_mode, count_exp);
10770 /* We don't use loops to align destination and to copy parts smaller
10771 than 4 bytes, because gcc is able to optimize such code better (in
10772 the case the destination or the count really is aligned, gcc is often
10773 able to predict the branches) and also it is friendlier to the
10774 hardware branch prediction.
10776 Using loops is beneficial for generic case, because we can
10777 handle small counts using the loops. Many CPUs (such as Athlon)
10778 have large REP prefix setup costs.
10780 This is quite costly. Maybe we can revisit this decision later or
10781 add some customizability to this code. */
10783 if (count == 0 && align < desired_alignment)
10785 label = gen_label_rtx ();
10786 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10787 LEU, 0, counter_mode, 1, label);
10791 rtx label = ix86_expand_aligntest (destreg, 1);
10792 emit_insn (gen_strmovqi (destreg, srcreg));
10793 ix86_adjust_counter (countreg, 1);
10794 emit_label (label);
10795 LABEL_NUSES (label) = 1;
10799 rtx label = ix86_expand_aligntest (destreg, 2);
10800 emit_insn (gen_strmovhi (destreg, srcreg));
10801 ix86_adjust_counter (countreg, 2);
10802 emit_label (label);
10803 LABEL_NUSES (label) = 1;
10805 if (align <= 4 && desired_alignment > 4)
10807 rtx label = ix86_expand_aligntest (destreg, 4);
10808 emit_insn (gen_strmovsi (destreg, srcreg));
10809 ix86_adjust_counter (countreg, 4);
10810 emit_label (label);
10811 LABEL_NUSES (label) = 1;
10814 if (label && desired_alignment > 4 && !TARGET_64BIT)
10816 emit_label (label);
10817 LABEL_NUSES (label) = 1;
10820 if (!TARGET_SINGLE_STRINGOP)
10821 emit_insn (gen_cld ());
10824 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10826 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10827 destreg, srcreg, countreg2));
10831 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10832 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10833 destreg, srcreg, countreg2));
10838 emit_label (label);
10839 LABEL_NUSES (label) = 1;
10841 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10842 emit_insn (gen_strmovsi (destreg, srcreg));
10843 if ((align <= 4 || count == 0) && TARGET_64BIT)
10845 rtx label = ix86_expand_aligntest (countreg, 4);
10846 emit_insn (gen_strmovsi (destreg, srcreg));
10847 emit_label (label);
10848 LABEL_NUSES (label) = 1;
10850 if (align > 2 && count != 0 && (count & 2))
10851 emit_insn (gen_strmovhi (destreg, srcreg));
10852 if (align <= 2 || count == 0)
10854 rtx label = ix86_expand_aligntest (countreg, 2);
10855 emit_insn (gen_strmovhi (destreg, srcreg));
10856 emit_label (label);
10857 LABEL_NUSES (label) = 1;
10859 if (align > 1 && count != 0 && (count & 1))
10860 emit_insn (gen_strmovqi (destreg, srcreg));
10861 if (align <= 1 || count == 0)
10863 rtx label = ix86_expand_aligntest (countreg, 1);
10864 emit_insn (gen_strmovqi (destreg, srcreg));
10865 emit_label (label);
10866 LABEL_NUSES (label) = 1;
10870 insns = get_insns ();
10873 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10878 /* Expand string clear operation (bzero). Use i386 string operations when
10879 profitable. expand_movstr contains similar code. */
10881 ix86_expand_clrstr (src, count_exp, align_exp)
10882 rtx src, count_exp, align_exp;
10884 rtx destreg, zeroreg, countreg;
10885 enum machine_mode counter_mode;
10886 HOST_WIDE_INT align = 0;
10887 unsigned HOST_WIDE_INT count = 0;
10889 if (GET_CODE (align_exp) == CONST_INT)
10890 align = INTVAL (align_exp);
10892 /* This simple hack avoids all inlining code and simplifies code below. */
10893 if (!TARGET_ALIGN_STRINGOPS)
10896 if (GET_CODE (count_exp) == CONST_INT)
10898 count = INTVAL (count_exp);
10899 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10902 /* Figure out proper mode for counter. For 32bits it is always SImode,
10903 for 64bits use SImode when possible, otherwise DImode.
10904 Set count to number of bytes copied when known at compile time. */
10905 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10906 || x86_64_zero_extended_value (count_exp))
10907 counter_mode = SImode;
10909 counter_mode = DImode;
10911 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10913 emit_insn (gen_cld ());
10915 /* When optimizing for size emit simple rep ; movsb instruction for
10916 counts not divisible by 4. */
10918 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10920 countreg = ix86_zero_extend_to_Pmode (count_exp);
10921 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10923 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10924 destreg, countreg));
10926 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10927 destreg, countreg));
10929 else if (count != 0
10931 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10932 || optimize_size || count < (unsigned int) 64))
10934 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10935 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10936 if (count & ~(size - 1))
10938 countreg = copy_to_mode_reg (counter_mode,
10939 GEN_INT ((count >> (size == 4 ? 2 : 3))
10940 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10941 countreg = ix86_zero_extend_to_Pmode (countreg);
10945 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10946 destreg, countreg));
10948 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10949 destreg, countreg));
10952 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10953 destreg, countreg));
10955 if (size == 8 && (count & 0x04))
10956 emit_insn (gen_strsetsi (destreg,
10957 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10959 emit_insn (gen_strsethi (destreg,
10960 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10962 emit_insn (gen_strsetqi (destreg,
10963 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10969 /* Compute desired alignment of the string operation. */
10970 int desired_alignment = (TARGET_PENTIUMPRO
10971 && (count == 0 || count >= (unsigned int) 260)
10972 ? 8 : UNITS_PER_WORD);
10974 /* In case we don't know anything about the alignment, default to
10975 library version, since it is usually equally fast and result in
10978 Also emit call when we know that the count is large and call overhead
10979 will not be important. */
10980 if (!TARGET_INLINE_ALL_STRINGOPS
10981 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10984 if (TARGET_SINGLE_STRINGOP)
10985 emit_insn (gen_cld ());
10987 countreg2 = gen_reg_rtx (Pmode);
10988 countreg = copy_to_mode_reg (counter_mode, count_exp);
10989 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10991 if (count == 0 && align < desired_alignment)
10993 label = gen_label_rtx ();
10994 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10995 LEU, 0, counter_mode, 1, label);
10999 rtx label = ix86_expand_aligntest (destreg, 1);
11000 emit_insn (gen_strsetqi (destreg,
11001 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11002 ix86_adjust_counter (countreg, 1);
11003 emit_label (label);
11004 LABEL_NUSES (label) = 1;
11008 rtx label = ix86_expand_aligntest (destreg, 2);
11009 emit_insn (gen_strsethi (destreg,
11010 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11011 ix86_adjust_counter (countreg, 2);
11012 emit_label (label);
11013 LABEL_NUSES (label) = 1;
11015 if (align <= 4 && desired_alignment > 4)
11017 rtx label = ix86_expand_aligntest (destreg, 4);
11018 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11019 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11021 ix86_adjust_counter (countreg, 4);
11022 emit_label (label);
11023 LABEL_NUSES (label) = 1;
11026 if (label && desired_alignment > 4 && !TARGET_64BIT)
11028 emit_label (label);
11029 LABEL_NUSES (label) = 1;
11033 if (!TARGET_SINGLE_STRINGOP)
11034 emit_insn (gen_cld ());
11037 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11039 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11040 destreg, countreg2));
11044 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11045 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11046 destreg, countreg2));
11050 emit_label (label);
11051 LABEL_NUSES (label) = 1;
11054 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11055 emit_insn (gen_strsetsi (destreg,
11056 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11057 if (TARGET_64BIT && (align <= 4 || count == 0))
11059 rtx label = ix86_expand_aligntest (countreg, 4);
11060 emit_insn (gen_strsetsi (destreg,
11061 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11062 emit_label (label);
11063 LABEL_NUSES (label) = 1;
11065 if (align > 2 && count != 0 && (count & 2))
11066 emit_insn (gen_strsethi (destreg,
11067 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11068 if (align <= 2 || count == 0)
11070 rtx label = ix86_expand_aligntest (countreg, 2);
11071 emit_insn (gen_strsethi (destreg,
11072 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11073 emit_label (label);
11074 LABEL_NUSES (label) = 1;
11076 if (align > 1 && count != 0 && (count & 1))
11077 emit_insn (gen_strsetqi (destreg,
11078 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11079 if (align <= 1 || count == 0)
11081 rtx label = ix86_expand_aligntest (countreg, 1);
11082 emit_insn (gen_strsetqi (destreg,
11083 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11084 emit_label (label);
11085 LABEL_NUSES (label) = 1;
11090 /* Expand strlen. */
11092 ix86_expand_strlen (out, src, eoschar, align)
11093 rtx out, src, eoschar, align;
11095 rtx addr, scratch1, scratch2, scratch3, scratch4;
11097 /* The generic case of strlen expander is long. Avoid it's
11098 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11100 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11101 && !TARGET_INLINE_ALL_STRINGOPS
11103 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11106 addr = force_reg (Pmode, XEXP (src, 0));
11107 scratch1 = gen_reg_rtx (Pmode);
11109 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11112 /* Well it seems that some optimizer does not combine a call like
11113 foo(strlen(bar), strlen(bar));
11114 when the move and the subtraction is done here. It does calculate
11115 the length just once when these instructions are done inside of
11116 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11117 often used and I use one fewer register for the lifetime of
11118 output_strlen_unroll() this is better. */
11120 emit_move_insn (out, addr);
11122 ix86_expand_strlensi_unroll_1 (out, align);
11124 /* strlensi_unroll_1 returns the address of the zero at the end of
11125 the string, like memchr(), so compute the length by subtracting
11126 the start address. */
11128 emit_insn (gen_subdi3 (out, out, addr));
11130 emit_insn (gen_subsi3 (out, out, addr));
11134 scratch2 = gen_reg_rtx (Pmode);
11135 scratch3 = gen_reg_rtx (Pmode);
11136 scratch4 = force_reg (Pmode, constm1_rtx);
11138 emit_move_insn (scratch3, addr);
11139 eoschar = force_reg (QImode, eoschar);
11141 emit_insn (gen_cld ());
11144 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11145 align, scratch4, scratch3));
11146 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11147 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11151 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11152 align, scratch4, scratch3));
11153 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11154 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11160 /* Expand the appropriate insns for doing strlen if not just doing
11163 out = result, initialized with the start address
11164 align_rtx = alignment of the address.
11165 scratch = scratch register, initialized with the startaddress when
11166 not aligned, otherwise undefined
11168 This is just the body. It needs the initialisations mentioned above and
11169 some address computing at the end. These things are done in i386.md. */
11172 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11173 rtx out, align_rtx;
11177 rtx align_2_label = NULL_RTX;
11178 rtx align_3_label = NULL_RTX;
11179 rtx align_4_label = gen_label_rtx ();
11180 rtx end_0_label = gen_label_rtx ();
11182 rtx tmpreg = gen_reg_rtx (SImode);
11183 rtx scratch = gen_reg_rtx (SImode);
11186 if (GET_CODE (align_rtx) == CONST_INT)
11187 align = INTVAL (align_rtx);
11189 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11191 /* Is there a known alignment and is it less than 4? */
11194 rtx scratch1 = gen_reg_rtx (Pmode);
11195 emit_move_insn (scratch1, out);
11196 /* Is there a known alignment and is it not 2? */
11199 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11200 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11202 /* Leave just the 3 lower bits. */
11203 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11204 NULL_RTX, 0, OPTAB_WIDEN);
11206 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11207 Pmode, 1, align_4_label);
11208 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11209 Pmode, 1, align_2_label);
11210 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11211 Pmode, 1, align_3_label);
11215 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11216 check if is aligned to 4 - byte. */
11218 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11219 NULL_RTX, 0, OPTAB_WIDEN);
11221 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11222 Pmode, 1, align_4_label);
11225 mem = gen_rtx_MEM (QImode, out);
11227 /* Now compare the bytes. */
11229 /* Compare the first n unaligned byte on a byte per byte basis. */
11230 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11231 QImode, 1, end_0_label);
11233 /* Increment the address. */
11235 emit_insn (gen_adddi3 (out, out, const1_rtx));
11237 emit_insn (gen_addsi3 (out, out, const1_rtx));
11239 /* Not needed with an alignment of 2 */
11242 emit_label (align_2_label);
11244 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11248 emit_insn (gen_adddi3 (out, out, const1_rtx));
11250 emit_insn (gen_addsi3 (out, out, const1_rtx));
11252 emit_label (align_3_label);
11255 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11259 emit_insn (gen_adddi3 (out, out, const1_rtx));
11261 emit_insn (gen_addsi3 (out, out, const1_rtx));
11264 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11265 align this loop. It gives only huge programs, but does not help to
11267 emit_label (align_4_label);
11269 mem = gen_rtx_MEM (SImode, out);
11270 emit_move_insn (scratch, mem);
11272 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11274 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11276 /* This formula yields a nonzero result iff one of the bytes is zero.
11277 This saves three branches inside loop and many cycles. */
11279 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11280 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11281 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11282 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11283 gen_int_mode (0x80808080, SImode)));
11284 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11289 rtx reg = gen_reg_rtx (SImode);
11290 rtx reg2 = gen_reg_rtx (Pmode);
11291 emit_move_insn (reg, tmpreg);
11292 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11294 /* If zero is not in the first two bytes, move two bytes forward. */
11295 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11296 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11297 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11298 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11299 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11302 /* Emit lea manually to avoid clobbering of flags. */
11303 emit_insn (gen_rtx_SET (SImode, reg2,
11304 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11306 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11307 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11308 emit_insn (gen_rtx_SET (VOIDmode, out,
11309 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11316 rtx end_2_label = gen_label_rtx ();
11317 /* Is zero in the first two bytes? */
11319 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11320 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11321 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11322 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11323 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11325 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11326 JUMP_LABEL (tmp) = end_2_label;
11328 /* Not in the first two. Move two bytes forward. */
11329 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11331 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11333 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11335 emit_label (end_2_label);
11339 /* Avoid branch in fixing the byte. */
11340 tmpreg = gen_lowpart (QImode, tmpreg);
11341 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11343 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
11345 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
11347 emit_label (end_0_label);
11351 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11352 rtx retval, fnaddr, callarg1, callarg2, pop;
11355 rtx use = NULL, call;
11357 if (pop == const0_rtx)
11359 if (TARGET_64BIT && pop)
11363 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11364 fnaddr = machopic_indirect_call_target (fnaddr);
11366 /* Static functions and indirect calls don't need the pic register. */
11367 if (! TARGET_64BIT && flag_pic
11368 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11369 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11370 use_reg (&use, pic_offset_table_rtx);
11372 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11374 rtx al = gen_rtx_REG (QImode, 0);
11375 emit_move_insn (al, callarg2);
11376 use_reg (&use, al);
11378 #endif /* TARGET_MACHO */
11380 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11382 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11383 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11385 if (sibcall && TARGET_64BIT
11386 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11389 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11390 fnaddr = gen_rtx_REG (Pmode, 40);
11391 emit_move_insn (fnaddr, addr);
11392 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11395 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11397 call = gen_rtx_SET (VOIDmode, retval, call);
11400 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11401 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11402 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11405 call = emit_call_insn (call);
11407 CALL_INSN_FUNCTION_USAGE (call) = use;
11411 /* Clear stack slot assignments remembered from previous functions.
11412 This is called from INIT_EXPANDERS once before RTL is emitted for each
11415 static struct machine_function *
11416 ix86_init_machine_status ()
11418 return ggc_alloc_cleared (sizeof (struct machine_function));
11421 /* Return a MEM corresponding to a stack slot with mode MODE.
11422 Allocate a new slot if necessary.
11424 The RTL for a function can have several slots available: N is
11425 which slot to use. */
11428 assign_386_stack_local (mode, n)
11429 enum machine_mode mode;
11432 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11435 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11436 ix86_stack_locals[(int) mode][n]
11437 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11439 return ix86_stack_locals[(int) mode][n];
11442 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11444 static GTY(()) rtx ix86_tls_symbol;
11446 ix86_tls_get_addr ()
11449 if (!ix86_tls_symbol)
11451 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11452 (TARGET_GNU_TLS && !TARGET_64BIT)
11453 ? "___tls_get_addr"
11454 : "__tls_get_addr");
11457 return ix86_tls_symbol;
11460 /* Calculate the length of the memory address in the instruction
11461 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11464 memory_address_length (addr)
11467 struct ix86_address parts;
11468 rtx base, index, disp;
11471 if (GET_CODE (addr) == PRE_DEC
11472 || GET_CODE (addr) == POST_INC
11473 || GET_CODE (addr) == PRE_MODIFY
11474 || GET_CODE (addr) == POST_MODIFY)
11477 if (! ix86_decompose_address (addr, &parts))
11481 index = parts.index;
11485 /* Register Indirect. */
11486 if (base && !index && !disp)
11488 /* Special cases: ebp and esp need the two-byte modrm form. */
11489 if (addr == stack_pointer_rtx
11490 || addr == arg_pointer_rtx
11491 || addr == frame_pointer_rtx
11492 || addr == hard_frame_pointer_rtx)
11496 /* Direct Addressing. */
11497 else if (disp && !base && !index)
11502 /* Find the length of the displacement constant. */
11505 if (GET_CODE (disp) == CONST_INT
11506 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11512 /* An index requires the two-byte modrm form. */
11520 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11521 is set, expect that insn have 8bit immediate alternative. */
11523 ix86_attr_length_immediate_default (insn, shortform)
11529 extract_insn_cached (insn);
11530 for (i = recog_data.n_operands - 1; i >= 0; --i)
11531 if (CONSTANT_P (recog_data.operand[i]))
11536 && GET_CODE (recog_data.operand[i]) == CONST_INT
11537 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11541 switch (get_attr_mode (insn))
11552 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11557 fatal_insn ("unknown insn mode", insn);
11563 /* Compute default value for "length_address" attribute. */
11565 ix86_attr_length_address_default (insn)
11569 extract_insn_cached (insn);
11570 for (i = recog_data.n_operands - 1; i >= 0; --i)
11571 if (GET_CODE (recog_data.operand[i]) == MEM)
11573 return memory_address_length (XEXP (recog_data.operand[i], 0));
11579 /* Return the maximum number of instructions a cpu can issue. */
11586 case PROCESSOR_PENTIUM:
11590 case PROCESSOR_PENTIUMPRO:
11591 case PROCESSOR_PENTIUM4:
11592 case PROCESSOR_ATHLON:
11601 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11602 by DEP_INSN and nothing set by DEP_INSN. */
11605 ix86_flags_dependant (insn, dep_insn, insn_type)
11606 rtx insn, dep_insn;
11607 enum attr_type insn_type;
11611 /* Simplify the test for uninteresting insns. */
11612 if (insn_type != TYPE_SETCC
11613 && insn_type != TYPE_ICMOV
11614 && insn_type != TYPE_FCMOV
11615 && insn_type != TYPE_IBR)
11618 if ((set = single_set (dep_insn)) != 0)
11620 set = SET_DEST (set);
11623 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11624 && XVECLEN (PATTERN (dep_insn), 0) == 2
11625 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11626 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11628 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11629 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11634 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11637 /* This test is true if the dependent insn reads the flags but
11638 not any other potentially set register. */
11639 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11642 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11648 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11649 address with operands set by DEP_INSN. */
11652 ix86_agi_dependant (insn, dep_insn, insn_type)
11653 rtx insn, dep_insn;
11654 enum attr_type insn_type;
11658 if (insn_type == TYPE_LEA
11661 addr = PATTERN (insn);
11662 if (GET_CODE (addr) == SET)
11664 else if (GET_CODE (addr) == PARALLEL
11665 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11666 addr = XVECEXP (addr, 0, 0);
11669 addr = SET_SRC (addr);
11674 extract_insn_cached (insn);
11675 for (i = recog_data.n_operands - 1; i >= 0; --i)
11676 if (GET_CODE (recog_data.operand[i]) == MEM)
11678 addr = XEXP (recog_data.operand[i], 0);
11685 return modified_in_p (addr, dep_insn);
11689 ix86_adjust_cost (insn, link, dep_insn, cost)
11690 rtx insn, link, dep_insn;
11693 enum attr_type insn_type, dep_insn_type;
11694 enum attr_memory memory, dep_memory;
11696 int dep_insn_code_number;
11698 /* Anti and output dependencies have zero cost on all CPUs. */
11699 if (REG_NOTE_KIND (link) != 0)
11702 dep_insn_code_number = recog_memoized (dep_insn);
11704 /* If we can't recognize the insns, we can't really do anything. */
11705 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11708 insn_type = get_attr_type (insn);
11709 dep_insn_type = get_attr_type (dep_insn);
11713 case PROCESSOR_PENTIUM:
11714 /* Address Generation Interlock adds a cycle of latency. */
11715 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11718 /* ??? Compares pair with jump/setcc. */
11719 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11722 /* Floating point stores require value to be ready one cycle earlier. */
11723 if (insn_type == TYPE_FMOV
11724 && get_attr_memory (insn) == MEMORY_STORE
11725 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11729 case PROCESSOR_PENTIUMPRO:
11730 memory = get_attr_memory (insn);
11731 dep_memory = get_attr_memory (dep_insn);
11733 /* Since we can't represent delayed latencies of load+operation,
11734 increase the cost here for non-imov insns. */
11735 if (dep_insn_type != TYPE_IMOV
11736 && dep_insn_type != TYPE_FMOV
11737 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11740 /* INT->FP conversion is expensive. */
11741 if (get_attr_fp_int_src (dep_insn))
11744 /* There is one cycle extra latency between an FP op and a store. */
11745 if (insn_type == TYPE_FMOV
11746 && (set = single_set (dep_insn)) != NULL_RTX
11747 && (set2 = single_set (insn)) != NULL_RTX
11748 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11749 && GET_CODE (SET_DEST (set2)) == MEM)
11752 /* Show ability of reorder buffer to hide latency of load by executing
11753 in parallel with previous instruction in case
11754 previous instruction is not needed to compute the address. */
11755 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11756 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11758 /* Claim moves to take one cycle, as core can issue one load
11759 at time and the next load can start cycle later. */
11760 if (dep_insn_type == TYPE_IMOV
11761 || dep_insn_type == TYPE_FMOV)
11769 memory = get_attr_memory (insn);
11770 dep_memory = get_attr_memory (dep_insn);
11771 /* The esp dependency is resolved before the instruction is really
11773 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11774 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11777 /* Since we can't represent delayed latencies of load+operation,
11778 increase the cost here for non-imov insns. */
11779 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11780 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11782 /* INT->FP conversion is expensive. */
11783 if (get_attr_fp_int_src (dep_insn))
11786 /* Show ability of reorder buffer to hide latency of load by executing
11787 in parallel with previous instruction in case
11788 previous instruction is not needed to compute the address. */
11789 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11790 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11792 /* Claim moves to take one cycle, as core can issue one load
11793 at time and the next load can start cycle later. */
11794 if (dep_insn_type == TYPE_IMOV
11795 || dep_insn_type == TYPE_FMOV)
11804 case PROCESSOR_ATHLON:
11806 memory = get_attr_memory (insn);
11807 dep_memory = get_attr_memory (dep_insn);
11809 /* Show ability of reorder buffer to hide latency of load by executing
11810 in parallel with previous instruction in case
11811 previous instruction is not needed to compute the address. */
11812 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11813 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11815 /* Claim moves to take one cycle, as core can issue one load
11816 at time and the next load can start cycle later. */
11817 if (dep_insn_type == TYPE_IMOV
11818 || dep_insn_type == TYPE_FMOV)
11820 else if (cost >= 3)
11835 struct ppro_sched_data
11838 int issued_this_cycle;
11842 static enum attr_ppro_uops
11843 ix86_safe_ppro_uops (insn)
11846 if (recog_memoized (insn) >= 0)
11847 return get_attr_ppro_uops (insn);
11849 return PPRO_UOPS_MANY;
11853 ix86_dump_ppro_packet (dump)
11856 if (ix86_sched_data.ppro.decode[0])
11858 fprintf (dump, "PPRO packet: %d",
11859 INSN_UID (ix86_sched_data.ppro.decode[0]));
11860 if (ix86_sched_data.ppro.decode[1])
11861 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11862 if (ix86_sched_data.ppro.decode[2])
11863 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11864 fputc ('\n', dump);
11868 /* We're beginning a new block. Initialize data structures as necessary. */
11871 ix86_sched_init (dump, sched_verbose, veclen)
11872 FILE *dump ATTRIBUTE_UNUSED;
11873 int sched_verbose ATTRIBUTE_UNUSED;
11874 int veclen ATTRIBUTE_UNUSED;
11876 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11879 /* Shift INSN to SLOT, and shift everything else down. */
11882 ix86_reorder_insn (insnp, slot)
11889 insnp[0] = insnp[1];
11890 while (++insnp != slot);
11896 ix86_sched_reorder_ppro (ready, e_ready)
11901 enum attr_ppro_uops cur_uops;
11902 int issued_this_cycle;
11906 /* At this point .ppro.decode contains the state of the three
11907 decoders from last "cycle". That is, those insns that were
11908 actually independent. But here we're scheduling for the
11909 decoder, and we may find things that are decodable in the
11912 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
11913 issued_this_cycle = 0;
11916 cur_uops = ix86_safe_ppro_uops (*insnp);
11918 /* If the decoders are empty, and we've a complex insn at the
11919 head of the priority queue, let it issue without complaint. */
11920 if (decode[0] == NULL)
11922 if (cur_uops == PPRO_UOPS_MANY)
11924 decode[0] = *insnp;
11928 /* Otherwise, search for a 2-4 uop unsn to issue. */
11929 while (cur_uops != PPRO_UOPS_FEW)
11931 if (insnp == ready)
11933 cur_uops = ix86_safe_ppro_uops (*--insnp);
11936 /* If so, move it to the head of the line. */
11937 if (cur_uops == PPRO_UOPS_FEW)
11938 ix86_reorder_insn (insnp, e_ready);
11940 /* Issue the head of the queue. */
11941 issued_this_cycle = 1;
11942 decode[0] = *e_ready--;
11945 /* Look for simple insns to fill in the other two slots. */
11946 for (i = 1; i < 3; ++i)
11947 if (decode[i] == NULL)
11949 if (ready > e_ready)
11953 cur_uops = ix86_safe_ppro_uops (*insnp);
11954 while (cur_uops != PPRO_UOPS_ONE)
11956 if (insnp == ready)
11958 cur_uops = ix86_safe_ppro_uops (*--insnp);
11961 /* Found one. Move it to the head of the queue and issue it. */
11962 if (cur_uops == PPRO_UOPS_ONE)
11964 ix86_reorder_insn (insnp, e_ready);
11965 decode[i] = *e_ready--;
11966 issued_this_cycle++;
11970 /* ??? Didn't find one. Ideally, here we would do a lazy split
11971 of 2-uop insns, issue one and queue the other. */
11975 if (issued_this_cycle == 0)
11976 issued_this_cycle = 1;
11977 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11980 /* We are about to being issuing insns for this clock cycle.
11981 Override the default sort algorithm to better slot instructions. */
11983 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
11984 FILE *dump ATTRIBUTE_UNUSED;
11985 int sched_verbose ATTRIBUTE_UNUSED;
11988 int clock_var ATTRIBUTE_UNUSED;
11990 int n_ready = *n_readyp;
11991 rtx *e_ready = ready + n_ready - 1;
11993 /* Make sure to go ahead and initialize key items in
11994 ix86_sched_data if we are not going to bother trying to
11995 reorder the ready queue. */
11998 ix86_sched_data.ppro.issued_this_cycle = 1;
12007 case PROCESSOR_PENTIUMPRO:
12008 ix86_sched_reorder_ppro (ready, e_ready);
12013 return ix86_issue_rate ();
12016 /* We are about to issue INSN. Return the number of insns left on the
12017 ready queue that can be issued this cycle. */
12020 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12024 int can_issue_more;
12030 return can_issue_more - 1;
12032 case PROCESSOR_PENTIUMPRO:
12034 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12036 if (uops == PPRO_UOPS_MANY)
12039 ix86_dump_ppro_packet (dump);
12040 ix86_sched_data.ppro.decode[0] = insn;
12041 ix86_sched_data.ppro.decode[1] = NULL;
12042 ix86_sched_data.ppro.decode[2] = NULL;
12044 ix86_dump_ppro_packet (dump);
12045 ix86_sched_data.ppro.decode[0] = NULL;
12047 else if (uops == PPRO_UOPS_FEW)
12050 ix86_dump_ppro_packet (dump);
12051 ix86_sched_data.ppro.decode[0] = insn;
12052 ix86_sched_data.ppro.decode[1] = NULL;
12053 ix86_sched_data.ppro.decode[2] = NULL;
12057 for (i = 0; i < 3; ++i)
12058 if (ix86_sched_data.ppro.decode[i] == NULL)
12060 ix86_sched_data.ppro.decode[i] = insn;
12068 ix86_dump_ppro_packet (dump);
12069 ix86_sched_data.ppro.decode[0] = NULL;
12070 ix86_sched_data.ppro.decode[1] = NULL;
12071 ix86_sched_data.ppro.decode[2] = NULL;
12075 return --ix86_sched_data.ppro.issued_this_cycle;
12080 ia32_use_dfa_pipeline_interface ()
12082 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12087 /* How many alternative schedules to try. This should be as wide as the
12088 scheduling freedom in the DFA, but no wider. Making this value too
12089 large results extra work for the scheduler. */
12092 ia32_multipass_dfa_lookahead ()
12094 if (ix86_cpu == PROCESSOR_PENTIUM)
12101 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12102 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12106 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12108 rtx dstref, srcref, dstreg, srcreg;
12112 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12114 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12118 /* Subroutine of above to actually do the updating by recursively walking
12122 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12124 rtx dstref, srcref, dstreg, srcreg;
12126 enum rtx_code code = GET_CODE (x);
12127 const char *format_ptr = GET_RTX_FORMAT (code);
12130 if (code == MEM && XEXP (x, 0) == dstreg)
12131 MEM_COPY_ATTRIBUTES (x, dstref);
12132 else if (code == MEM && XEXP (x, 0) == srcreg)
12133 MEM_COPY_ATTRIBUTES (x, srcref);
12135 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12137 if (*format_ptr == 'e')
12138 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12140 else if (*format_ptr == 'E')
12141 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12142 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12147 /* Compute the alignment given to a constant that is being placed in memory.
12148 EXP is the constant and ALIGN is the alignment that the object would
12150 The value of this function is used instead of that alignment to align
12154 ix86_constant_alignment (exp, align)
12158 if (TREE_CODE (exp) == REAL_CST)
12160 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12162 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12165 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12172 /* Compute the alignment for a static variable.
12173 TYPE is the data type, and ALIGN is the alignment that
12174 the object would ordinarily have. The value of this function is used
12175 instead of that alignment to align the object. */
12178 ix86_data_alignment (type, align)
12182 if (AGGREGATE_TYPE_P (type)
12183 && TYPE_SIZE (type)
12184 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12185 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12186 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12189 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12190 to 16byte boundary. */
12193 if (AGGREGATE_TYPE_P (type)
12194 && TYPE_SIZE (type)
12195 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12196 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12197 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12201 if (TREE_CODE (type) == ARRAY_TYPE)
12203 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12205 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12208 else if (TREE_CODE (type) == COMPLEX_TYPE)
12211 if (TYPE_MODE (type) == DCmode && align < 64)
12213 if (TYPE_MODE (type) == XCmode && align < 128)
12216 else if ((TREE_CODE (type) == RECORD_TYPE
12217 || TREE_CODE (type) == UNION_TYPE
12218 || TREE_CODE (type) == QUAL_UNION_TYPE)
12219 && TYPE_FIELDS (type))
12221 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12223 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12226 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12227 || TREE_CODE (type) == INTEGER_TYPE)
12229 if (TYPE_MODE (type) == DFmode && align < 64)
12231 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12238 /* Compute the alignment for a local variable.
12239 TYPE is the data type, and ALIGN is the alignment that
12240 the object would ordinarily have. The value of this macro is used
12241 instead of that alignment to align the object. */
12244 ix86_local_alignment (type, align)
12248 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12249 to 16byte boundary. */
12252 if (AGGREGATE_TYPE_P (type)
12253 && TYPE_SIZE (type)
12254 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12255 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12256 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12259 if (TREE_CODE (type) == ARRAY_TYPE)
12261 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12263 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12266 else if (TREE_CODE (type) == COMPLEX_TYPE)
12268 if (TYPE_MODE (type) == DCmode && align < 64)
12270 if (TYPE_MODE (type) == XCmode && align < 128)
12273 else if ((TREE_CODE (type) == RECORD_TYPE
12274 || TREE_CODE (type) == UNION_TYPE
12275 || TREE_CODE (type) == QUAL_UNION_TYPE)
12276 && TYPE_FIELDS (type))
12278 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12280 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12283 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12284 || TREE_CODE (type) == INTEGER_TYPE)
12287 if (TYPE_MODE (type) == DFmode && align < 64)
12289 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12295 /* Emit RTL insns to initialize the variable parts of a trampoline.
12296 FNADDR is an RTX for the address of the function's pure code.
12297 CXT is an RTX for the static chain value for the function. */
12299 x86_initialize_trampoline (tramp, fnaddr, cxt)
12300 rtx tramp, fnaddr, cxt;
12304 /* Compute offset from the end of the jmp to the target function. */
12305 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12306 plus_constant (tramp, 10),
12307 NULL_RTX, 1, OPTAB_DIRECT);
12308 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12309 gen_int_mode (0xb9, QImode));
12310 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12311 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12312 gen_int_mode (0xe9, QImode));
12313 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12318 /* Try to load address using shorter movl instead of movabs.
12319 We may want to support movq for kernel mode, but kernel does not use
12320 trampolines at the moment. */
12321 if (x86_64_zero_extended_value (fnaddr))
12323 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12324 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12325 gen_int_mode (0xbb41, HImode));
12326 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12327 gen_lowpart (SImode, fnaddr));
12332 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12333 gen_int_mode (0xbb49, HImode));
12334 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12338 /* Load static chain using movabs to r10. */
12339 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12340 gen_int_mode (0xba49, HImode));
12341 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12344 /* Jump to the r11 */
12345 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12346 gen_int_mode (0xff49, HImode));
12347 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12348 gen_int_mode (0xe3, QImode));
12350 if (offset > TRAMPOLINE_SIZE)
12354 #ifdef TRANSFER_FROM_TRAMPOLINE
12355 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12356 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12360 #define def_builtin(MASK, NAME, TYPE, CODE) \
12362 if ((MASK) & target_flags) \
12363 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12364 NULL, NULL_TREE); \
12367 struct builtin_description
12369 const unsigned int mask;
12370 const enum insn_code icode;
12371 const char *const name;
12372 const enum ix86_builtins code;
12373 const enum rtx_code comparison;
12374 const unsigned int flag;
12377 /* Used for builtins that are enabled both by -msse and -msse2. */
12378 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12380 static const struct builtin_description bdesc_comi[] =
12382 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12383 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12384 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12385 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12386 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12387 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12388 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12389 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12390 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12391 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12392 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12393 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12394 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12395 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12396 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12397 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12398 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12399 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12400 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12401 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12402 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12403 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12404 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12405 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12408 static const struct builtin_description bdesc_2arg[] =
12411 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12412 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12413 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12414 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12415 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12416 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12417 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12418 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12420 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12421 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12422 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12423 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12424 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12425 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12426 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12427 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12428 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12429 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12430 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12431 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12432 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12433 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12434 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12435 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12436 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12437 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12438 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12439 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12441 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12442 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12443 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12444 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12446 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12447 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12448 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12449 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12451 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12452 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12453 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12454 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12455 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12458 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12459 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12460 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12461 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12462 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12463 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12465 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12466 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12467 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12468 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12469 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12470 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12471 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12472 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12474 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12475 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12476 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12478 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12479 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12480 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12481 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12483 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12484 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12486 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12487 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12488 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12489 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12490 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12491 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12493 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12494 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12495 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12496 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12498 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12499 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12500 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12501 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12502 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12503 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12506 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12507 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12508 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12510 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12511 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12513 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12514 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12515 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12516 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12517 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12518 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12520 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12521 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12522 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12523 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12524 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12525 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12527 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12528 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12529 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12530 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12532 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12533 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12536 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12537 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12538 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12539 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12540 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12541 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12542 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12543 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12545 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12546 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12547 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12548 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12549 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12550 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12551 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12552 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12553 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12554 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12555 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12556 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12557 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12558 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12559 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12560 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12561 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12562 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12563 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12564 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12566 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12567 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12568 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12569 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12571 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12572 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12573 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12574 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12576 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12577 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12578 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12581 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12582 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12583 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12584 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12585 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12586 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12587 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12588 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12590 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12591 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12592 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12593 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12594 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12595 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12596 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12597 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12599 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12600 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12601 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12602 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12604 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12605 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12606 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12607 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12609 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12610 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12612 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12613 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12614 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12615 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12616 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12617 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12619 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12620 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12621 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12622 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12624 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12625 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12626 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12627 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12628 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12629 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12630 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12631 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12633 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12634 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12635 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12637 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12638 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12640 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12641 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12642 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12643 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12644 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12645 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12647 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12648 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12649 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12650 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12651 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12652 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12654 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12655 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12656 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12657 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12659 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12666 static const struct builtin_description bdesc_1arg[] =
12668 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12669 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12671 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12672 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12673 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12675 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12676 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12677 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12678 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12680 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12681 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12682 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12683 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12685 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12687 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12688 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12690 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12691 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12693 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12696 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12698 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12701 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12702 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12703 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12709 ix86_init_builtins ()
12712 ix86_init_mmx_sse_builtins ();
12715 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12716 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12719 ix86_init_mmx_sse_builtins ()
12721 const struct builtin_description * d;
12724 tree pchar_type_node = build_pointer_type (char_type_node);
12725 tree pcchar_type_node = build_pointer_type (
12726 build_type_variant (char_type_node, 1, 0));
12727 tree pfloat_type_node = build_pointer_type (float_type_node);
12728 tree pcfloat_type_node = build_pointer_type (
12729 build_type_variant (float_type_node, 1, 0));
12730 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12731 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12732 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12735 tree int_ftype_v4sf_v4sf
12736 = build_function_type_list (integer_type_node,
12737 V4SF_type_node, V4SF_type_node, NULL_TREE);
12738 tree v4si_ftype_v4sf_v4sf
12739 = build_function_type_list (V4SI_type_node,
12740 V4SF_type_node, V4SF_type_node, NULL_TREE);
12741 /* MMX/SSE/integer conversions. */
12742 tree int_ftype_v4sf
12743 = build_function_type_list (integer_type_node,
12744 V4SF_type_node, NULL_TREE);
12745 tree int_ftype_v8qi
12746 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12747 tree v4sf_ftype_v4sf_int
12748 = build_function_type_list (V4SF_type_node,
12749 V4SF_type_node, integer_type_node, NULL_TREE);
12750 tree v4sf_ftype_v4sf_v2si
12751 = build_function_type_list (V4SF_type_node,
12752 V4SF_type_node, V2SI_type_node, NULL_TREE);
12753 tree int_ftype_v4hi_int
12754 = build_function_type_list (integer_type_node,
12755 V4HI_type_node, integer_type_node, NULL_TREE);
12756 tree v4hi_ftype_v4hi_int_int
12757 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12758 integer_type_node, integer_type_node,
12760 /* Miscellaneous. */
12761 tree v8qi_ftype_v4hi_v4hi
12762 = build_function_type_list (V8QI_type_node,
12763 V4HI_type_node, V4HI_type_node, NULL_TREE);
12764 tree v4hi_ftype_v2si_v2si
12765 = build_function_type_list (V4HI_type_node,
12766 V2SI_type_node, V2SI_type_node, NULL_TREE);
12767 tree v4sf_ftype_v4sf_v4sf_int
12768 = build_function_type_list (V4SF_type_node,
12769 V4SF_type_node, V4SF_type_node,
12770 integer_type_node, NULL_TREE);
12771 tree v2si_ftype_v4hi_v4hi
12772 = build_function_type_list (V2SI_type_node,
12773 V4HI_type_node, V4HI_type_node, NULL_TREE);
12774 tree v4hi_ftype_v4hi_int
12775 = build_function_type_list (V4HI_type_node,
12776 V4HI_type_node, integer_type_node, NULL_TREE);
12777 tree v4hi_ftype_v4hi_di
12778 = build_function_type_list (V4HI_type_node,
12779 V4HI_type_node, long_long_unsigned_type_node,
12781 tree v2si_ftype_v2si_di
12782 = build_function_type_list (V2SI_type_node,
12783 V2SI_type_node, long_long_unsigned_type_node,
12785 tree void_ftype_void
12786 = build_function_type (void_type_node, void_list_node);
12787 tree void_ftype_unsigned
12788 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12789 tree unsigned_ftype_void
12790 = build_function_type (unsigned_type_node, void_list_node);
12792 = build_function_type (long_long_unsigned_type_node, void_list_node);
12793 tree v4sf_ftype_void
12794 = build_function_type (V4SF_type_node, void_list_node);
12795 tree v2si_ftype_v4sf
12796 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12797 /* Loads/stores. */
12798 tree void_ftype_v8qi_v8qi_pchar
12799 = build_function_type_list (void_type_node,
12800 V8QI_type_node, V8QI_type_node,
12801 pchar_type_node, NULL_TREE);
12802 tree v4sf_ftype_pcfloat
12803 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12804 /* @@@ the type is bogus */
12805 tree v4sf_ftype_v4sf_pv2si
12806 = build_function_type_list (V4SF_type_node,
12807 V4SF_type_node, pv2si_type_node, NULL_TREE);
12808 tree void_ftype_pv2si_v4sf
12809 = build_function_type_list (void_type_node,
12810 pv2si_type_node, V4SF_type_node, NULL_TREE);
12811 tree void_ftype_pfloat_v4sf
12812 = build_function_type_list (void_type_node,
12813 pfloat_type_node, V4SF_type_node, NULL_TREE);
12814 tree void_ftype_pdi_di
12815 = build_function_type_list (void_type_node,
12816 pdi_type_node, long_long_unsigned_type_node,
12818 tree void_ftype_pv2di_v2di
12819 = build_function_type_list (void_type_node,
12820 pv2di_type_node, V2DI_type_node, NULL_TREE);
12821 /* Normal vector unops. */
12822 tree v4sf_ftype_v4sf
12823 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12825 /* Normal vector binops. */
12826 tree v4sf_ftype_v4sf_v4sf
12827 = build_function_type_list (V4SF_type_node,
12828 V4SF_type_node, V4SF_type_node, NULL_TREE);
12829 tree v8qi_ftype_v8qi_v8qi
12830 = build_function_type_list (V8QI_type_node,
12831 V8QI_type_node, V8QI_type_node, NULL_TREE);
12832 tree v4hi_ftype_v4hi_v4hi
12833 = build_function_type_list (V4HI_type_node,
12834 V4HI_type_node, V4HI_type_node, NULL_TREE);
12835 tree v2si_ftype_v2si_v2si
12836 = build_function_type_list (V2SI_type_node,
12837 V2SI_type_node, V2SI_type_node, NULL_TREE);
12838 tree di_ftype_di_di
12839 = build_function_type_list (long_long_unsigned_type_node,
12840 long_long_unsigned_type_node,
12841 long_long_unsigned_type_node, NULL_TREE);
12843 tree v2si_ftype_v2sf
12844 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12845 tree v2sf_ftype_v2si
12846 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12847 tree v2si_ftype_v2si
12848 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12849 tree v2sf_ftype_v2sf
12850 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12851 tree v2sf_ftype_v2sf_v2sf
12852 = build_function_type_list (V2SF_type_node,
12853 V2SF_type_node, V2SF_type_node, NULL_TREE);
12854 tree v2si_ftype_v2sf_v2sf
12855 = build_function_type_list (V2SI_type_node,
12856 V2SF_type_node, V2SF_type_node, NULL_TREE);
12857 tree pint_type_node = build_pointer_type (integer_type_node);
12858 tree pcint_type_node = build_pointer_type (
12859 build_type_variant (integer_type_node, 1, 0));
12860 tree pdouble_type_node = build_pointer_type (double_type_node);
12861 tree pcdouble_type_node = build_pointer_type (
12862 build_type_variant (double_type_node, 1, 0));
12863 tree int_ftype_v2df_v2df
12864 = build_function_type_list (integer_type_node,
12865 V2DF_type_node, V2DF_type_node, NULL_TREE);
12868 = build_function_type (intTI_type_node, void_list_node);
12869 tree v2di_ftype_void
12870 = build_function_type (V2DI_type_node, void_list_node);
12871 tree ti_ftype_ti_ti
12872 = build_function_type_list (intTI_type_node,
12873 intTI_type_node, intTI_type_node, NULL_TREE);
12874 tree void_ftype_pcvoid
12875 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
12877 = build_function_type_list (V2DI_type_node,
12878 long_long_unsigned_type_node, NULL_TREE);
12880 = build_function_type_list (long_long_unsigned_type_node,
12881 V2DI_type_node, NULL_TREE);
12882 tree v4sf_ftype_v4si
12883 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
12884 tree v4si_ftype_v4sf
12885 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
12886 tree v2df_ftype_v4si
12887 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
12888 tree v4si_ftype_v2df
12889 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
12890 tree v2si_ftype_v2df
12891 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
12892 tree v4sf_ftype_v2df
12893 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
12894 tree v2df_ftype_v2si
12895 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
12896 tree v2df_ftype_v4sf
12897 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
12898 tree int_ftype_v2df
12899 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
12900 tree v2df_ftype_v2df_int
12901 = build_function_type_list (V2DF_type_node,
12902 V2DF_type_node, integer_type_node, NULL_TREE);
12903 tree v4sf_ftype_v4sf_v2df
12904 = build_function_type_list (V4SF_type_node,
12905 V4SF_type_node, V2DF_type_node, NULL_TREE);
12906 tree v2df_ftype_v2df_v4sf
12907 = build_function_type_list (V2DF_type_node,
12908 V2DF_type_node, V4SF_type_node, NULL_TREE);
12909 tree v2df_ftype_v2df_v2df_int
12910 = build_function_type_list (V2DF_type_node,
12911 V2DF_type_node, V2DF_type_node,
12914 tree v2df_ftype_v2df_pv2si
12915 = build_function_type_list (V2DF_type_node,
12916 V2DF_type_node, pv2si_type_node, NULL_TREE);
12917 tree void_ftype_pv2si_v2df
12918 = build_function_type_list (void_type_node,
12919 pv2si_type_node, V2DF_type_node, NULL_TREE);
12920 tree void_ftype_pdouble_v2df
12921 = build_function_type_list (void_type_node,
12922 pdouble_type_node, V2DF_type_node, NULL_TREE);
12923 tree void_ftype_pint_int
12924 = build_function_type_list (void_type_node,
12925 pint_type_node, integer_type_node, NULL_TREE);
12926 tree void_ftype_v16qi_v16qi_pchar
12927 = build_function_type_list (void_type_node,
12928 V16QI_type_node, V16QI_type_node,
12929 pchar_type_node, NULL_TREE);
12930 tree v2df_ftype_pcdouble
12931 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
12932 tree v2df_ftype_v2df_v2df
12933 = build_function_type_list (V2DF_type_node,
12934 V2DF_type_node, V2DF_type_node, NULL_TREE);
12935 tree v16qi_ftype_v16qi_v16qi
12936 = build_function_type_list (V16QI_type_node,
12937 V16QI_type_node, V16QI_type_node, NULL_TREE);
12938 tree v8hi_ftype_v8hi_v8hi
12939 = build_function_type_list (V8HI_type_node,
12940 V8HI_type_node, V8HI_type_node, NULL_TREE);
12941 tree v4si_ftype_v4si_v4si
12942 = build_function_type_list (V4SI_type_node,
12943 V4SI_type_node, V4SI_type_node, NULL_TREE);
12944 tree v2di_ftype_v2di_v2di
12945 = build_function_type_list (V2DI_type_node,
12946 V2DI_type_node, V2DI_type_node, NULL_TREE);
12947 tree v2di_ftype_v2df_v2df
12948 = build_function_type_list (V2DI_type_node,
12949 V2DF_type_node, V2DF_type_node, NULL_TREE);
12950 tree v2df_ftype_v2df
12951 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
12952 tree v2df_ftype_double
12953 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
12954 tree v2df_ftype_double_double
12955 = build_function_type_list (V2DF_type_node,
12956 double_type_node, double_type_node, NULL_TREE);
12957 tree int_ftype_v8hi_int
12958 = build_function_type_list (integer_type_node,
12959 V8HI_type_node, integer_type_node, NULL_TREE);
12960 tree v8hi_ftype_v8hi_int_int
12961 = build_function_type_list (V8HI_type_node,
12962 V8HI_type_node, integer_type_node,
12963 integer_type_node, NULL_TREE);
12964 tree v2di_ftype_v2di_int
12965 = build_function_type_list (V2DI_type_node,
12966 V2DI_type_node, integer_type_node, NULL_TREE);
12967 tree v4si_ftype_v4si_int
12968 = build_function_type_list (V4SI_type_node,
12969 V4SI_type_node, integer_type_node, NULL_TREE);
12970 tree v8hi_ftype_v8hi_int
12971 = build_function_type_list (V8HI_type_node,
12972 V8HI_type_node, integer_type_node, NULL_TREE);
12973 tree v8hi_ftype_v8hi_v2di
12974 = build_function_type_list (V8HI_type_node,
12975 V8HI_type_node, V2DI_type_node, NULL_TREE);
12976 tree v4si_ftype_v4si_v2di
12977 = build_function_type_list (V4SI_type_node,
12978 V4SI_type_node, V2DI_type_node, NULL_TREE);
12979 tree v4si_ftype_v8hi_v8hi
12980 = build_function_type_list (V4SI_type_node,
12981 V8HI_type_node, V8HI_type_node, NULL_TREE);
12982 tree di_ftype_v8qi_v8qi
12983 = build_function_type_list (long_long_unsigned_type_node,
12984 V8QI_type_node, V8QI_type_node, NULL_TREE);
12985 tree v2di_ftype_v16qi_v16qi
12986 = build_function_type_list (V2DI_type_node,
12987 V16QI_type_node, V16QI_type_node, NULL_TREE);
12988 tree int_ftype_v16qi
12989 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
12990 tree v16qi_ftype_pcchar
12991 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
12992 tree void_ftype_pchar_v16qi
12993 = build_function_type_list (void_type_node,
12994 pchar_type_node, V16QI_type_node, NULL_TREE);
12995 tree v4si_ftype_pcint
12996 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
12997 tree void_ftype_pcint_v4si
12998 = build_function_type_list (void_type_node,
12999 pcint_type_node, V4SI_type_node, NULL_TREE);
13000 tree v2di_ftype_v2di
13001 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13003 /* Add all builtins that are more or less simple operations on two
13005 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13007 /* Use one of the operands; the target can have a different mode for
13008 mask-generating compares. */
13009 enum machine_mode mode;
13014 mode = insn_data[d->icode].operand[1].mode;
13019 type = v16qi_ftype_v16qi_v16qi;
13022 type = v8hi_ftype_v8hi_v8hi;
13025 type = v4si_ftype_v4si_v4si;
13028 type = v2di_ftype_v2di_v2di;
13031 type = v2df_ftype_v2df_v2df;
13034 type = ti_ftype_ti_ti;
13037 type = v4sf_ftype_v4sf_v4sf;
13040 type = v8qi_ftype_v8qi_v8qi;
13043 type = v4hi_ftype_v4hi_v4hi;
13046 type = v2si_ftype_v2si_v2si;
13049 type = di_ftype_di_di;
13056 /* Override for comparisons. */
13057 if (d->icode == CODE_FOR_maskcmpv4sf3
13058 || d->icode == CODE_FOR_maskncmpv4sf3
13059 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13060 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13061 type = v4si_ftype_v4sf_v4sf;
13063 if (d->icode == CODE_FOR_maskcmpv2df3
13064 || d->icode == CODE_FOR_maskncmpv2df3
13065 || d->icode == CODE_FOR_vmmaskcmpv2df3
13066 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13067 type = v2di_ftype_v2df_v2df;
13069 def_builtin (d->mask, d->name, type, d->code);
13072 /* Add the remaining MMX insns with somewhat more complicated types. */
13073 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13074 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13075 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13076 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13077 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13079 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13080 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13081 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13083 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13084 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13086 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13087 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13089 /* comi/ucomi insns. */
13090 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13091 if (d->mask == MASK_SSE2)
13092 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13094 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13096 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13097 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13098 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13100 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13101 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13102 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13103 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13104 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13105 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13106 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13107 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13109 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13110 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13112 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13114 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13115 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13116 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13117 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13118 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13119 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13121 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13122 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13123 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13124 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13126 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13127 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13128 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13129 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13131 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13133 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13135 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13136 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13137 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13138 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13139 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13140 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13142 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13144 /* Original 3DNow! */
13145 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13146 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13147 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13148 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13149 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13150 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13151 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13152 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13153 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13154 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13155 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13156 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13157 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13158 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13159 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13160 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13161 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13162 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13163 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13164 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13166 /* 3DNow! extension as used in the Athlon CPU. */
13167 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13168 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13169 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13170 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13171 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13172 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13174 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13177 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13178 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13180 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13181 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13182 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13184 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13185 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13186 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13187 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13188 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13189 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13191 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13192 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13193 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13194 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13196 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13197 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13198 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13199 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13200 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13202 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13203 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13204 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13205 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13207 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13208 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13210 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13212 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13213 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13215 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13216 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13217 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13218 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13219 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13221 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13223 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13224 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13226 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13227 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13228 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13230 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13231 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13232 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13234 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13235 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13236 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13237 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13238 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13239 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13240 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13242 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13243 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13244 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13246 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13247 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13248 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13249 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13250 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13251 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13252 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13254 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13256 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13257 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13258 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13260 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13261 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13262 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13264 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13265 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13267 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13268 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13269 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13270 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13272 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13273 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13274 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13275 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13277 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13278 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13280 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13283 /* Errors in the source file can cause expand_expr to return const0_rtx
13284 where we expect a vector. To avoid crashing, use one of the vector
13285 clear instructions. */
13287 safe_vector_operand (x, mode)
13289 enum machine_mode mode;
13291 if (x != const0_rtx)
13293 x = gen_reg_rtx (mode);
13295 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13296 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13297 : gen_rtx_SUBREG (DImode, x, 0)));
13299 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13300 : gen_rtx_SUBREG (V4SFmode, x, 0),
13301 CONST0_RTX (V4SFmode)));
13305 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13308 ix86_expand_binop_builtin (icode, arglist, target)
13309 enum insn_code icode;
13314 tree arg0 = TREE_VALUE (arglist);
13315 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13316 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13317 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13318 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13319 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13320 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13322 if (VECTOR_MODE_P (mode0))
13323 op0 = safe_vector_operand (op0, mode0);
13324 if (VECTOR_MODE_P (mode1))
13325 op1 = safe_vector_operand (op1, mode1);
13328 || GET_MODE (target) != tmode
13329 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13330 target = gen_reg_rtx (tmode);
13332 /* In case the insn wants input operands in modes different from
13333 the result, abort. */
13334 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13337 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13338 op0 = copy_to_mode_reg (mode0, op0);
13339 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13340 op1 = copy_to_mode_reg (mode1, op1);
13342 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13343 yet one of the two must not be a memory. This is normally enforced
13344 by expanders, but we didn't bother to create one here. */
13345 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13346 op0 = copy_to_mode_reg (mode0, op0);
13348 pat = GEN_FCN (icode) (target, op0, op1);
13355 /* Subroutine of ix86_expand_builtin to take care of stores. */
13358 ix86_expand_store_builtin (icode, arglist)
13359 enum insn_code icode;
13363 tree arg0 = TREE_VALUE (arglist);
13364 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13365 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13366 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13367 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13368 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13370 if (VECTOR_MODE_P (mode1))
13371 op1 = safe_vector_operand (op1, mode1);
13373 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13375 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13376 op1 = copy_to_mode_reg (mode1, op1);
13378 pat = GEN_FCN (icode) (op0, op1);
13384 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13387 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13388 enum insn_code icode;
13394 tree arg0 = TREE_VALUE (arglist);
13395 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13396 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13397 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13400 || GET_MODE (target) != tmode
13401 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13402 target = gen_reg_rtx (tmode);
13404 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13407 if (VECTOR_MODE_P (mode0))
13408 op0 = safe_vector_operand (op0, mode0);
13410 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13411 op0 = copy_to_mode_reg (mode0, op0);
13414 pat = GEN_FCN (icode) (target, op0);
13421 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13422 sqrtss, rsqrtss, rcpss. */
13425 ix86_expand_unop1_builtin (icode, arglist, target)
13426 enum insn_code icode;
13431 tree arg0 = TREE_VALUE (arglist);
13432 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13433 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13434 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13437 || GET_MODE (target) != tmode
13438 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13439 target = gen_reg_rtx (tmode);
13441 if (VECTOR_MODE_P (mode0))
13442 op0 = safe_vector_operand (op0, mode0);
13444 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13445 op0 = copy_to_mode_reg (mode0, op0);
13448 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13449 op1 = copy_to_mode_reg (mode0, op1);
13451 pat = GEN_FCN (icode) (target, op0, op1);
13458 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13461 ix86_expand_sse_compare (d, arglist, target)
13462 const struct builtin_description *d;
13467 tree arg0 = TREE_VALUE (arglist);
13468 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13469 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13470 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13472 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13473 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13474 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13475 enum rtx_code comparison = d->comparison;
13477 if (VECTOR_MODE_P (mode0))
13478 op0 = safe_vector_operand (op0, mode0);
13479 if (VECTOR_MODE_P (mode1))
13480 op1 = safe_vector_operand (op1, mode1);
13482 /* Swap operands if we have a comparison that isn't available in
13486 rtx tmp = gen_reg_rtx (mode1);
13487 emit_move_insn (tmp, op1);
13493 || GET_MODE (target) != tmode
13494 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13495 target = gen_reg_rtx (tmode);
13497 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13498 op0 = copy_to_mode_reg (mode0, op0);
13499 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13500 op1 = copy_to_mode_reg (mode1, op1);
13502 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13503 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13510 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13513 ix86_expand_sse_comi (d, arglist, target)
13514 const struct builtin_description *d;
13519 tree arg0 = TREE_VALUE (arglist);
13520 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13521 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13522 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13524 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13525 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13526 enum rtx_code comparison = d->comparison;
13528 if (VECTOR_MODE_P (mode0))
13529 op0 = safe_vector_operand (op0, mode0);
13530 if (VECTOR_MODE_P (mode1))
13531 op1 = safe_vector_operand (op1, mode1);
13533 /* Swap operands if we have a comparison that isn't available in
13542 target = gen_reg_rtx (SImode);
13543 emit_move_insn (target, const0_rtx);
13544 target = gen_rtx_SUBREG (QImode, target, 0);
13546 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13547 op0 = copy_to_mode_reg (mode0, op0);
13548 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13549 op1 = copy_to_mode_reg (mode1, op1);
13551 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13552 pat = GEN_FCN (d->icode) (op0, op1);
13556 emit_insn (gen_rtx_SET (VOIDmode,
13557 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13558 gen_rtx_fmt_ee (comparison, QImode,
13562 return SUBREG_REG (target);
13565 /* Expand an expression EXP that calls a built-in function,
13566 with result going to TARGET if that's convenient
13567 (and in mode MODE if that's convenient).
13568 SUBTARGET may be used as the target for computing one of EXP's operands.
13569 IGNORE is nonzero if the value is to be ignored. */
13572 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13575 rtx subtarget ATTRIBUTE_UNUSED;
13576 enum machine_mode mode ATTRIBUTE_UNUSED;
13577 int ignore ATTRIBUTE_UNUSED;
13579 const struct builtin_description *d;
13581 enum insn_code icode;
13582 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13583 tree arglist = TREE_OPERAND (exp, 1);
13584 tree arg0, arg1, arg2;
13585 rtx op0, op1, op2, pat;
13586 enum machine_mode tmode, mode0, mode1, mode2;
13587 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13591 case IX86_BUILTIN_EMMS:
13592 emit_insn (gen_emms ());
13595 case IX86_BUILTIN_SFENCE:
13596 emit_insn (gen_sfence ());
13599 case IX86_BUILTIN_PEXTRW:
13600 case IX86_BUILTIN_PEXTRW128:
13601 icode = (fcode == IX86_BUILTIN_PEXTRW
13602 ? CODE_FOR_mmx_pextrw
13603 : CODE_FOR_sse2_pextrw);
13604 arg0 = TREE_VALUE (arglist);
13605 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13606 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13607 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13608 tmode = insn_data[icode].operand[0].mode;
13609 mode0 = insn_data[icode].operand[1].mode;
13610 mode1 = insn_data[icode].operand[2].mode;
13612 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13613 op0 = copy_to_mode_reg (mode0, op0);
13614 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13616 /* @@@ better error message */
13617 error ("selector must be an immediate");
13618 return gen_reg_rtx (tmode);
13621 || GET_MODE (target) != tmode
13622 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13623 target = gen_reg_rtx (tmode);
13624 pat = GEN_FCN (icode) (target, op0, op1);
13630 case IX86_BUILTIN_PINSRW:
13631 case IX86_BUILTIN_PINSRW128:
13632 icode = (fcode == IX86_BUILTIN_PINSRW
13633 ? CODE_FOR_mmx_pinsrw
13634 : CODE_FOR_sse2_pinsrw);
13635 arg0 = TREE_VALUE (arglist);
13636 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13637 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13638 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13639 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13640 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13641 tmode = insn_data[icode].operand[0].mode;
13642 mode0 = insn_data[icode].operand[1].mode;
13643 mode1 = insn_data[icode].operand[2].mode;
13644 mode2 = insn_data[icode].operand[3].mode;
13646 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13647 op0 = copy_to_mode_reg (mode0, op0);
13648 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13649 op1 = copy_to_mode_reg (mode1, op1);
13650 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13652 /* @@@ better error message */
13653 error ("selector must be an immediate");
13657 || GET_MODE (target) != tmode
13658 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13659 target = gen_reg_rtx (tmode);
13660 pat = GEN_FCN (icode) (target, op0, op1, op2);
13666 case IX86_BUILTIN_MASKMOVQ:
13667 case IX86_BUILTIN_MASKMOVDQU:
13668 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13669 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13670 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13671 : CODE_FOR_sse2_maskmovdqu));
13672 /* Note the arg order is different from the operand order. */
13673 arg1 = TREE_VALUE (arglist);
13674 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13675 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13676 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13677 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13678 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13679 mode0 = insn_data[icode].operand[0].mode;
13680 mode1 = insn_data[icode].operand[1].mode;
13681 mode2 = insn_data[icode].operand[2].mode;
13683 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13684 op0 = copy_to_mode_reg (mode0, op0);
13685 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13686 op1 = copy_to_mode_reg (mode1, op1);
13687 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13688 op2 = copy_to_mode_reg (mode2, op2);
13689 pat = GEN_FCN (icode) (op0, op1, op2);
13695 case IX86_BUILTIN_SQRTSS:
13696 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13697 case IX86_BUILTIN_RSQRTSS:
13698 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13699 case IX86_BUILTIN_RCPSS:
13700 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13702 case IX86_BUILTIN_LOADAPS:
13703 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13705 case IX86_BUILTIN_LOADUPS:
13706 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13708 case IX86_BUILTIN_STOREAPS:
13709 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13711 case IX86_BUILTIN_STOREUPS:
13712 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13714 case IX86_BUILTIN_LOADSS:
13715 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13717 case IX86_BUILTIN_STORESS:
13718 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13720 case IX86_BUILTIN_LOADHPS:
13721 case IX86_BUILTIN_LOADLPS:
13722 case IX86_BUILTIN_LOADHPD:
13723 case IX86_BUILTIN_LOADLPD:
13724 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13725 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13726 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13727 : CODE_FOR_sse2_movlpd);
13728 arg0 = TREE_VALUE (arglist);
13729 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13730 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13731 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13732 tmode = insn_data[icode].operand[0].mode;
13733 mode0 = insn_data[icode].operand[1].mode;
13734 mode1 = insn_data[icode].operand[2].mode;
13736 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13737 op0 = copy_to_mode_reg (mode0, op0);
13738 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13740 || GET_MODE (target) != tmode
13741 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13742 target = gen_reg_rtx (tmode);
13743 pat = GEN_FCN (icode) (target, op0, op1);
13749 case IX86_BUILTIN_STOREHPS:
13750 case IX86_BUILTIN_STORELPS:
13751 case IX86_BUILTIN_STOREHPD:
13752 case IX86_BUILTIN_STORELPD:
13753 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13754 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13755 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13756 : CODE_FOR_sse2_movlpd);
13757 arg0 = TREE_VALUE (arglist);
13758 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13759 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13760 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13761 mode0 = insn_data[icode].operand[1].mode;
13762 mode1 = insn_data[icode].operand[2].mode;
13764 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13765 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13766 op1 = copy_to_mode_reg (mode1, op1);
13768 pat = GEN_FCN (icode) (op0, op0, op1);
13774 case IX86_BUILTIN_MOVNTPS:
13775 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13776 case IX86_BUILTIN_MOVNTQ:
13777 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13779 case IX86_BUILTIN_LDMXCSR:
13780 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13781 target = assign_386_stack_local (SImode, 0);
13782 emit_move_insn (target, op0);
13783 emit_insn (gen_ldmxcsr (target));
13786 case IX86_BUILTIN_STMXCSR:
13787 target = assign_386_stack_local (SImode, 0);
13788 emit_insn (gen_stmxcsr (target));
13789 return copy_to_mode_reg (SImode, target);
13791 case IX86_BUILTIN_SHUFPS:
13792 case IX86_BUILTIN_SHUFPD:
13793 icode = (fcode == IX86_BUILTIN_SHUFPS
13794 ? CODE_FOR_sse_shufps
13795 : CODE_FOR_sse2_shufpd);
13796 arg0 = TREE_VALUE (arglist);
13797 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13798 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13799 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13800 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13801 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13802 tmode = insn_data[icode].operand[0].mode;
13803 mode0 = insn_data[icode].operand[1].mode;
13804 mode1 = insn_data[icode].operand[2].mode;
13805 mode2 = insn_data[icode].operand[3].mode;
13807 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13808 op0 = copy_to_mode_reg (mode0, op0);
13809 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13810 op1 = copy_to_mode_reg (mode1, op1);
13811 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13813 /* @@@ better error message */
13814 error ("mask must be an immediate");
13815 return gen_reg_rtx (tmode);
13818 || GET_MODE (target) != tmode
13819 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13820 target = gen_reg_rtx (tmode);
13821 pat = GEN_FCN (icode) (target, op0, op1, op2);
13827 case IX86_BUILTIN_PSHUFW:
13828 case IX86_BUILTIN_PSHUFD:
13829 case IX86_BUILTIN_PSHUFHW:
13830 case IX86_BUILTIN_PSHUFLW:
13831 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13832 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13833 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13834 : CODE_FOR_mmx_pshufw);
13835 arg0 = TREE_VALUE (arglist);
13836 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13837 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13838 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13839 tmode = insn_data[icode].operand[0].mode;
13840 mode1 = insn_data[icode].operand[1].mode;
13841 mode2 = insn_data[icode].operand[2].mode;
13843 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13844 op0 = copy_to_mode_reg (mode1, op0);
13845 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13847 /* @@@ better error message */
13848 error ("mask must be an immediate");
13852 || GET_MODE (target) != tmode
13853 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13854 target = gen_reg_rtx (tmode);
13855 pat = GEN_FCN (icode) (target, op0, op1);
13861 case IX86_BUILTIN_PSLLDQI128:
13862 case IX86_BUILTIN_PSRLDQI128:
13863 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13864 : CODE_FOR_sse2_lshrti3);
13865 arg0 = TREE_VALUE (arglist);
13866 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13867 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13868 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13869 tmode = insn_data[icode].operand[0].mode;
13870 mode1 = insn_data[icode].operand[1].mode;
13871 mode2 = insn_data[icode].operand[2].mode;
13873 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13875 op0 = copy_to_reg (op0);
13876 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
13878 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13880 error ("shift must be an immediate");
13883 target = gen_reg_rtx (V2DImode);
13884 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
13890 case IX86_BUILTIN_FEMMS:
13891 emit_insn (gen_femms ());
13894 case IX86_BUILTIN_PAVGUSB:
13895 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13897 case IX86_BUILTIN_PF2ID:
13898 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13900 case IX86_BUILTIN_PFACC:
13901 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13903 case IX86_BUILTIN_PFADD:
13904 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13906 case IX86_BUILTIN_PFCMPEQ:
13907 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13909 case IX86_BUILTIN_PFCMPGE:
13910 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13912 case IX86_BUILTIN_PFCMPGT:
13913 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13915 case IX86_BUILTIN_PFMAX:
13916 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13918 case IX86_BUILTIN_PFMIN:
13919 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13921 case IX86_BUILTIN_PFMUL:
13922 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13924 case IX86_BUILTIN_PFRCP:
13925 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13927 case IX86_BUILTIN_PFRCPIT1:
13928 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13930 case IX86_BUILTIN_PFRCPIT2:
13931 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13933 case IX86_BUILTIN_PFRSQIT1:
13934 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13936 case IX86_BUILTIN_PFRSQRT:
13937 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13939 case IX86_BUILTIN_PFSUB:
13940 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13942 case IX86_BUILTIN_PFSUBR:
13943 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13945 case IX86_BUILTIN_PI2FD:
13946 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13948 case IX86_BUILTIN_PMULHRW:
13949 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13951 case IX86_BUILTIN_PF2IW:
13952 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13954 case IX86_BUILTIN_PFNACC:
13955 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13957 case IX86_BUILTIN_PFPNACC:
13958 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13960 case IX86_BUILTIN_PI2FW:
13961 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13963 case IX86_BUILTIN_PSWAPDSI:
13964 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13966 case IX86_BUILTIN_PSWAPDSF:
13967 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13969 case IX86_BUILTIN_SSE_ZERO:
13970 target = gen_reg_rtx (V4SFmode);
13971 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
13974 case IX86_BUILTIN_MMX_ZERO:
13975 target = gen_reg_rtx (DImode);
13976 emit_insn (gen_mmx_clrdi (target));
13979 case IX86_BUILTIN_CLRTI:
13980 target = gen_reg_rtx (V2DImode);
13981 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
13985 case IX86_BUILTIN_SQRTSD:
13986 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13987 case IX86_BUILTIN_LOADAPD:
13988 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13989 case IX86_BUILTIN_LOADUPD:
13990 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13992 case IX86_BUILTIN_STOREAPD:
13993 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13994 case IX86_BUILTIN_STOREUPD:
13995 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13997 case IX86_BUILTIN_LOADSD:
13998 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14000 case IX86_BUILTIN_STORESD:
14001 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14003 case IX86_BUILTIN_SETPD1:
14004 target = assign_386_stack_local (DFmode, 0);
14005 arg0 = TREE_VALUE (arglist);
14006 emit_move_insn (adjust_address (target, DFmode, 0),
14007 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14008 op0 = gen_reg_rtx (V2DFmode);
14009 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14010 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14013 case IX86_BUILTIN_SETPD:
14014 target = assign_386_stack_local (V2DFmode, 0);
14015 arg0 = TREE_VALUE (arglist);
14016 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14017 emit_move_insn (adjust_address (target, DFmode, 0),
14018 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14019 emit_move_insn (adjust_address (target, DFmode, 8),
14020 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14021 op0 = gen_reg_rtx (V2DFmode);
14022 emit_insn (gen_sse2_movapd (op0, target));
14025 case IX86_BUILTIN_LOADRPD:
14026 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14027 gen_reg_rtx (V2DFmode), 1);
14028 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14031 case IX86_BUILTIN_LOADPD1:
14032 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14033 gen_reg_rtx (V2DFmode), 1);
14034 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14037 case IX86_BUILTIN_STOREPD1:
14038 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14039 case IX86_BUILTIN_STORERPD:
14040 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14042 case IX86_BUILTIN_CLRPD:
14043 target = gen_reg_rtx (V2DFmode);
14044 emit_insn (gen_sse_clrv2df (target));
14047 case IX86_BUILTIN_MFENCE:
14048 emit_insn (gen_sse2_mfence ());
14050 case IX86_BUILTIN_LFENCE:
14051 emit_insn (gen_sse2_lfence ());
14054 case IX86_BUILTIN_CLFLUSH:
14055 arg0 = TREE_VALUE (arglist);
14056 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14057 icode = CODE_FOR_sse2_clflush;
14058 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14059 op0 = copy_to_mode_reg (Pmode, op0);
14061 emit_insn (gen_sse2_clflush (op0));
14064 case IX86_BUILTIN_MOVNTPD:
14065 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14066 case IX86_BUILTIN_MOVNTDQ:
14067 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14068 case IX86_BUILTIN_MOVNTI:
14069 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14071 case IX86_BUILTIN_LOADDQA:
14072 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14073 case IX86_BUILTIN_LOADDQU:
14074 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14075 case IX86_BUILTIN_LOADD:
14076 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14078 case IX86_BUILTIN_STOREDQA:
14079 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14080 case IX86_BUILTIN_STOREDQU:
14081 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14082 case IX86_BUILTIN_STORED:
14083 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14089 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14090 if (d->code == fcode)
14092 /* Compares are treated specially. */
14093 if (d->icode == CODE_FOR_maskcmpv4sf3
14094 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14095 || d->icode == CODE_FOR_maskncmpv4sf3
14096 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14097 || d->icode == CODE_FOR_maskcmpv2df3
14098 || d->icode == CODE_FOR_vmmaskcmpv2df3
14099 || d->icode == CODE_FOR_maskncmpv2df3
14100 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14101 return ix86_expand_sse_compare (d, arglist, target);
14103 return ix86_expand_binop_builtin (d->icode, arglist, target);
14106 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14107 if (d->code == fcode)
14108 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14110 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14111 if (d->code == fcode)
14112 return ix86_expand_sse_comi (d, arglist, target);
14114 /* @@@ Should really do something sensible here. */
14118 /* Store OPERAND to the memory after reload is completed. This means
14119 that we can't easily use assign_stack_local. */
14121 ix86_force_to_memory (mode, operand)
14122 enum machine_mode mode;
14126 if (!reload_completed)
14128 if (TARGET_64BIT && TARGET_RED_ZONE)
14130 result = gen_rtx_MEM (mode,
14131 gen_rtx_PLUS (Pmode,
14133 GEN_INT (-RED_ZONE_SIZE)));
14134 emit_move_insn (result, operand);
14136 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14142 operand = gen_lowpart (DImode, operand);
14146 gen_rtx_SET (VOIDmode,
14147 gen_rtx_MEM (DImode,
14148 gen_rtx_PRE_DEC (DImode,
14149 stack_pointer_rtx)),
14155 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14164 split_di (&operand, 1, operands, operands + 1);
14166 gen_rtx_SET (VOIDmode,
14167 gen_rtx_MEM (SImode,
14168 gen_rtx_PRE_DEC (Pmode,
14169 stack_pointer_rtx)),
14172 gen_rtx_SET (VOIDmode,
14173 gen_rtx_MEM (SImode,
14174 gen_rtx_PRE_DEC (Pmode,
14175 stack_pointer_rtx)),
14180 /* It is better to store HImodes as SImodes. */
14181 if (!TARGET_PARTIAL_REG_STALL)
14182 operand = gen_lowpart (SImode, operand);
14186 gen_rtx_SET (VOIDmode,
14187 gen_rtx_MEM (GET_MODE (operand),
14188 gen_rtx_PRE_DEC (SImode,
14189 stack_pointer_rtx)),
14195 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14200 /* Free operand from the memory. */
14202 ix86_free_from_memory (mode)
14203 enum machine_mode mode;
14205 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14209 if (mode == DImode || TARGET_64BIT)
14211 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14215 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14216 to pop or add instruction if registers are available. */
14217 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14218 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14223 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14224 QImode must go into class Q_REGS.
14225 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14226 movdf to do mem-to-mem moves through integer regs. */
14228 ix86_preferred_reload_class (x, class)
14230 enum reg_class class;
14232 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14234 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14236 /* SSE can't load any constant directly yet. */
14237 if (SSE_CLASS_P (class))
14239 /* Floats can load 0 and 1. */
14240 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14242 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14243 if (MAYBE_SSE_CLASS_P (class))
14244 return (reg_class_subset_p (class, GENERAL_REGS)
14245 ? GENERAL_REGS : FLOAT_REGS);
14249 /* General regs can load everything. */
14250 if (reg_class_subset_p (class, GENERAL_REGS))
14251 return GENERAL_REGS;
14252 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14253 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14256 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14258 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14263 /* If we are copying between general and FP registers, we need a memory
14264 location. The same is true for SSE and MMX registers.
14266 The macro can't work reliably when one of the CLASSES is class containing
14267 registers from multiple units (SSE, MMX, integer). We avoid this by never
14268 combining those units in single alternative in the machine description.
14269 Ensure that this constraint holds to avoid unexpected surprises.
14271 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14272 enforce these sanity checks. */
14274 ix86_secondary_memory_needed (class1, class2, mode, strict)
14275 enum reg_class class1, class2;
14276 enum machine_mode mode;
14279 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14280 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14281 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14282 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14283 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14284 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14291 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14292 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14293 && (mode) != SImode)
14294 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14295 && (mode) != SImode));
14297 /* Return the cost of moving data from a register in class CLASS1 to
14298 one in class CLASS2.
14300 It is not required that the cost always equal 2 when FROM is the same as TO;
14301 on some machines it is expensive to move between registers if they are not
14302 general registers. */
14304 ix86_register_move_cost (mode, class1, class2)
14305 enum machine_mode mode;
14306 enum reg_class class1, class2;
14308 /* In case we require secondary memory, compute cost of the store followed
14309 by load. In order to avoid bad register allocation choices, we need
14310 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14312 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14316 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14317 MEMORY_MOVE_COST (mode, class1, 1));
14318 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14319 MEMORY_MOVE_COST (mode, class2, 1));
14321 /* In case of copying from general_purpose_register we may emit multiple
14322 stores followed by single load causing memory size mismatch stall.
14323 Count this as arbitrarily high cost of 20. */
14324 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14327 /* In the case of FP/MMX moves, the registers actually overlap, and we
14328 have to switch modes in order to treat them differently. */
14329 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14330 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14336 /* Moves between SSE/MMX and integer unit are expensive. */
14337 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14338 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14339 return ix86_cost->mmxsse_to_integer;
14340 if (MAYBE_FLOAT_CLASS_P (class1))
14341 return ix86_cost->fp_move;
14342 if (MAYBE_SSE_CLASS_P (class1))
14343 return ix86_cost->sse_move;
14344 if (MAYBE_MMX_CLASS_P (class1))
14345 return ix86_cost->mmx_move;
14349 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14351 ix86_hard_regno_mode_ok (regno, mode)
14353 enum machine_mode mode;
14355 /* Flags and only flags can only hold CCmode values. */
14356 if (CC_REGNO_P (regno))
14357 return GET_MODE_CLASS (mode) == MODE_CC;
14358 if (GET_MODE_CLASS (mode) == MODE_CC
14359 || GET_MODE_CLASS (mode) == MODE_RANDOM
14360 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14362 if (FP_REGNO_P (regno))
14363 return VALID_FP_MODE_P (mode);
14364 if (SSE_REGNO_P (regno))
14365 return VALID_SSE_REG_MODE (mode);
14366 if (MMX_REGNO_P (regno))
14367 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14368 /* We handle both integer and floats in the general purpose registers.
14369 In future we should be able to handle vector modes as well. */
14370 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14372 /* Take care for QImode values - they can be in non-QI regs, but then
14373 they do cause partial register stalls. */
14374 if (regno < 4 || mode != QImode || TARGET_64BIT)
14376 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14379 /* Return the cost of moving data of mode M between a
14380 register and memory. A value of 2 is the default; this cost is
14381 relative to those in `REGISTER_MOVE_COST'.
14383 If moving between registers and memory is more expensive than
14384 between two registers, you should define this macro to express the
14387 Model also increased moving costs of QImode registers in non
14391 ix86_memory_move_cost (mode, class, in)
14392 enum machine_mode mode;
14393 enum reg_class class;
14396 if (FLOAT_CLASS_P (class))
14414 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14416 if (SSE_CLASS_P (class))
14419 switch (GET_MODE_SIZE (mode))
14433 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14435 if (MMX_CLASS_P (class))
14438 switch (GET_MODE_SIZE (mode))
14449 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14451 switch (GET_MODE_SIZE (mode))
14455 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14456 : ix86_cost->movzbl_load);
14458 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14459 : ix86_cost->int_store[0] + 4);
14462 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14464 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14465 if (mode == TFmode)
14467 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14468 * ((int) GET_MODE_SIZE (mode)
14469 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14473 /* Compute a (partial) cost for rtx X. Return true if the complete
14474 cost has been computed, and false if subexpressions should be
14475 scanned. In either case, *TOTAL contains the cost result. */
14478 ix86_rtx_costs (x, code, outer_code, total)
14480 int code, outer_code;
14483 enum machine_mode mode = GET_MODE (x);
14491 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14493 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14495 else if (flag_pic && SYMBOLIC_CONST (x))
14502 if (mode == VOIDmode)
14505 switch (standard_80387_constant_p (x))
14514 /* Start with (MEM (SYMBOL_REF)), since that's where
14515 it'll probably end up. Add a penalty for size. */
14516 *total = (COSTS_N_INSNS (1)
14518 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14524 /* The zero extensions is often completely free on x86_64, so make
14525 it as cheap as possible. */
14526 if (TARGET_64BIT && mode == DImode
14527 && GET_MODE (XEXP (x, 0)) == SImode)
14529 else if (TARGET_ZERO_EXTEND_WITH_AND)
14530 *total = COSTS_N_INSNS (ix86_cost->add);
14532 *total = COSTS_N_INSNS (ix86_cost->movzx);
14536 *total = COSTS_N_INSNS (ix86_cost->movsx);
14540 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14541 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14543 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14546 *total = COSTS_N_INSNS (ix86_cost->add);
14549 if ((value == 2 || value == 3)
14550 && !TARGET_DECOMPOSE_LEA
14551 && ix86_cost->lea <= ix86_cost->shift_const)
14553 *total = COSTS_N_INSNS (ix86_cost->lea);
14563 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14565 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14567 if (INTVAL (XEXP (x, 1)) > 32)
14568 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14570 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14574 if (GET_CODE (XEXP (x, 1)) == AND)
14575 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14577 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14582 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14583 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14585 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14590 if (FLOAT_MODE_P (mode))
14591 *total = COSTS_N_INSNS (ix86_cost->fmul);
14592 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14594 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14597 for (nbits = 0; value != 0; value >>= 1)
14600 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14601 + nbits * ix86_cost->mult_bit);
14605 /* This is arbitrary */
14606 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14607 + 7 * ix86_cost->mult_bit);
14615 if (FLOAT_MODE_P (mode))
14616 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14618 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14622 if (FLOAT_MODE_P (mode))
14623 *total = COSTS_N_INSNS (ix86_cost->fadd);
14624 else if (!TARGET_DECOMPOSE_LEA
14625 && GET_MODE_CLASS (mode) == MODE_INT
14626 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14628 if (GET_CODE (XEXP (x, 0)) == PLUS
14629 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14630 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14631 && CONSTANT_P (XEXP (x, 1)))
14633 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14634 if (val == 2 || val == 4 || val == 8)
14636 *total = COSTS_N_INSNS (ix86_cost->lea);
14637 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14638 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14640 *total += rtx_cost (XEXP (x, 1), outer_code);
14644 else if (GET_CODE (XEXP (x, 0)) == MULT
14645 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14647 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14648 if (val == 2 || val == 4 || val == 8)
14650 *total = COSTS_N_INSNS (ix86_cost->lea);
14651 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14652 *total += rtx_cost (XEXP (x, 1), outer_code);
14656 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14658 *total = COSTS_N_INSNS (ix86_cost->lea);
14659 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14660 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14661 *total += rtx_cost (XEXP (x, 1), outer_code);
14668 if (FLOAT_MODE_P (mode))
14670 *total = COSTS_N_INSNS (ix86_cost->fadd);
14678 if (!TARGET_64BIT && mode == DImode)
14680 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14681 + (rtx_cost (XEXP (x, 0), outer_code)
14682 << (GET_MODE (XEXP (x, 0)) != DImode))
14683 + (rtx_cost (XEXP (x, 1), outer_code)
14684 << (GET_MODE (XEXP (x, 1)) != DImode)));
14690 if (FLOAT_MODE_P (mode))
14692 *total = COSTS_N_INSNS (ix86_cost->fchs);
14698 if (!TARGET_64BIT && mode == DImode)
14699 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14701 *total = COSTS_N_INSNS (ix86_cost->add);
14705 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14710 if (FLOAT_MODE_P (mode))
14711 *total = COSTS_N_INSNS (ix86_cost->fabs);
14715 if (FLOAT_MODE_P (mode))
14716 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14724 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14726 ix86_svr3_asm_out_constructor (symbol, priority)
14728 int priority ATTRIBUTE_UNUSED;
14731 fputs ("\tpushl $", asm_out_file);
14732 assemble_name (asm_out_file, XSTR (symbol, 0));
14733 fputc ('\n', asm_out_file);
14739 static int current_machopic_label_num;
14741 /* Given a symbol name and its associated stub, write out the
14742 definition of the stub. */
14745 machopic_output_stub (file, symb, stub)
14747 const char *symb, *stub;
14749 unsigned int length;
14750 char *binder_name, *symbol_name, lazy_ptr_name[32];
14751 int label = ++current_machopic_label_num;
14753 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14754 symb = (*targetm.strip_name_encoding) (symb);
14756 length = strlen (stub);
14757 binder_name = alloca (length + 32);
14758 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14760 length = strlen (symb);
14761 symbol_name = alloca (length + 32);
14762 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14764 sprintf (lazy_ptr_name, "L%d$lz", label);
14767 machopic_picsymbol_stub_section ();
14769 machopic_symbol_stub_section ();
14771 fprintf (file, "%s:\n", stub);
14772 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14776 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14777 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14778 fprintf (file, "\tjmp %%edx\n");
14781 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14783 fprintf (file, "%s:\n", binder_name);
14787 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14788 fprintf (file, "\tpushl %%eax\n");
14791 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14793 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14795 machopic_lazy_symbol_ptr_section ();
14796 fprintf (file, "%s:\n", lazy_ptr_name);
14797 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14798 fprintf (file, "\t.long %s\n", binder_name);
14800 #endif /* TARGET_MACHO */
14802 /* Order the registers for register allocator. */
14805 x86_order_regs_for_local_alloc ()
14810 /* First allocate the local general purpose registers. */
14811 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14812 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14813 reg_alloc_order [pos++] = i;
14815 /* Global general purpose registers. */
14816 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14817 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14818 reg_alloc_order [pos++] = i;
14820 /* x87 registers come first in case we are doing FP math
14822 if (!TARGET_SSE_MATH)
14823 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14824 reg_alloc_order [pos++] = i;
14826 /* SSE registers. */
14827 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14828 reg_alloc_order [pos++] = i;
14829 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14830 reg_alloc_order [pos++] = i;
14832 /* x87 registers. */
14833 if (TARGET_SSE_MATH)
14834 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14835 reg_alloc_order [pos++] = i;
14837 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14838 reg_alloc_order [pos++] = i;
14840 /* Initialize the rest of array as we do not allocate some registers
14842 while (pos < FIRST_PSEUDO_REGISTER)
14843 reg_alloc_order [pos++] = 0;
14846 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14847 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14850 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14851 struct attribute_spec.handler. */
14853 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14856 tree args ATTRIBUTE_UNUSED;
14857 int flags ATTRIBUTE_UNUSED;
14858 bool *no_add_attrs;
14861 if (DECL_P (*node))
14863 if (TREE_CODE (*node) == TYPE_DECL)
14864 type = &TREE_TYPE (*node);
14869 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14870 || TREE_CODE (*type) == UNION_TYPE)))
14872 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14873 *no_add_attrs = true;
14876 else if ((is_attribute_p ("ms_struct", name)
14877 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
14878 || ((is_attribute_p ("gcc_struct", name)
14879 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
14881 warning ("`%s' incompatible attribute ignored",
14882 IDENTIFIER_POINTER (name));
14883 *no_add_attrs = true;
14890 ix86_ms_bitfield_layout_p (record_type)
14893 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
14894 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
14895 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
14898 /* Returns an expression indicating where the this parameter is
14899 located on entry to the FUNCTION. */
14902 x86_this_parameter (function)
14905 tree type = TREE_TYPE (function);
14909 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
14910 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
14913 if (ix86_fntype_regparm (type) > 0)
14917 parm = TYPE_ARG_TYPES (type);
14918 /* Figure out whether or not the function has a variable number of
14920 for (; parm; parm = TREE_CHAIN (parm))
14921 if (TREE_VALUE (parm) == void_type_node)
14923 /* If not, the this parameter is in %eax. */
14925 return gen_rtx_REG (SImode, 0);
14928 if (aggregate_value_p (TREE_TYPE (type)))
14929 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
14931 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
14934 /* Determine whether x86_output_mi_thunk can succeed. */
14937 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
14938 tree thunk ATTRIBUTE_UNUSED;
14939 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
14940 HOST_WIDE_INT vcall_offset;
14943 /* 64-bit can handle anything. */
14947 /* For 32-bit, everything's fine if we have one free register. */
14948 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
14951 /* Need a free register for vcall_offset. */
14955 /* Need a free register for GOT references. */
14956 if (flag_pic && !(*targetm.binds_local_p) (function))
14959 /* Otherwise ok. */
14963 /* Output the assembler code for a thunk function. THUNK_DECL is the
14964 declaration for the thunk function itself, FUNCTION is the decl for
14965 the target function. DELTA is an immediate constant offset to be
14966 added to THIS. If VCALL_OFFSET is nonzero, the word at
14967 *(*this + vcall_offset) should be added to THIS. */
14970 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
14971 FILE *file ATTRIBUTE_UNUSED;
14972 tree thunk ATTRIBUTE_UNUSED;
14973 HOST_WIDE_INT delta;
14974 HOST_WIDE_INT vcall_offset;
14978 rtx this = x86_this_parameter (function);
14981 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
14982 pull it in now and let DELTA benefit. */
14985 else if (vcall_offset)
14987 /* Put the this parameter into %eax. */
14989 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
14990 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
14993 this_reg = NULL_RTX;
14995 /* Adjust the this parameter by a fixed constant. */
14998 xops[0] = GEN_INT (delta);
14999 xops[1] = this_reg ? this_reg : this;
15002 if (!x86_64_general_operand (xops[0], DImode))
15004 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15006 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15010 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15013 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15016 /* Adjust the this parameter by a value stored in the vtable. */
15020 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15022 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15024 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15027 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15029 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15031 /* Adjust the this parameter. */
15032 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15033 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15035 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15036 xops[0] = GEN_INT (vcall_offset);
15038 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15039 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15041 xops[1] = this_reg;
15043 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15045 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15048 /* If necessary, drop THIS back to its stack slot. */
15049 if (this_reg && this_reg != this)
15051 xops[0] = this_reg;
15053 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15056 xops[0] = DECL_RTL (function);
15059 if (!flag_pic || (*targetm.binds_local_p) (function))
15060 output_asm_insn ("jmp\t%P0", xops);
15063 tmp = XEXP (xops[0], 0);
15064 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15065 tmp = gen_rtx_CONST (Pmode, tmp);
15066 tmp = gen_rtx_MEM (QImode, tmp);
15068 output_asm_insn ("jmp\t%A0", xops);
15073 if (!flag_pic || (*targetm.binds_local_p) (function))
15074 output_asm_insn ("jmp\t%P0", xops);
15079 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15080 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15081 tmp = gen_rtx_MEM (QImode, tmp);
15083 output_asm_insn ("jmp\t%0", xops);
15086 #endif /* TARGET_MACHO */
15088 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15089 output_set_got (tmp);
15092 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15093 output_asm_insn ("jmp\t{*}%1", xops);
15099 x86_field_alignment (field, computed)
15103 enum machine_mode mode;
15104 tree type = TREE_TYPE (field);
15106 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15108 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15109 ? get_inner_array_type (type) : type);
15110 if (mode == DFmode || mode == DCmode
15111 || GET_MODE_CLASS (mode) == MODE_INT
15112 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15113 return MIN (32, computed);
15117 /* Output assembler code to FILE to increment profiler label # LABELNO
15118 for profiling a function entry. */
15120 x86_function_profiler (file, labelno)
15122 int labelno ATTRIBUTE_UNUSED;
15127 #ifndef NO_PROFILE_COUNTERS
15128 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15130 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15134 #ifndef NO_PROFILE_COUNTERS
15135 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15137 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15141 #ifndef NO_PROFILE_COUNTERS
15142 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15143 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15145 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15149 #ifndef NO_PROFILE_COUNTERS
15150 fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
15151 PROFILE_COUNT_REGISTER);
15153 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15157 /* Implement machine specific optimizations.
15158 At the moment we implement single transformation: AMD Athlon works faster
15159 when RET is not destination of conditional jump or directly preceded
15160 by other jump instruction. We avoid the penalty by inserting NOP just
15161 before the RET instructions in such cases. */
15163 x86_machine_dependent_reorg (first)
15164 rtx first ATTRIBUTE_UNUSED;
15168 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15170 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15172 basic_block bb = e->src;
15175 bool insert = false;
15177 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15179 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15180 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15182 if (prev && GET_CODE (prev) == CODE_LABEL)
15185 for (e = bb->pred; e; e = e->pred_next)
15186 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15187 && !(e->flags & EDGE_FALLTHRU))
15192 prev = prev_active_insn (ret);
15193 if (prev && GET_CODE (prev) == JUMP_INSN
15194 && any_condjump_p (prev))
15196 /* Empty functions get branch misspredict even when the jump destination
15197 is not visible to us. */
15198 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15202 emit_insn_before (gen_nop (), ret);
15206 /* Return nonzero when QImode register that must be represented via REX prefix
15209 x86_extended_QIreg_mentioned_p (insn)
15213 extract_insn_cached (insn);
15214 for (i = 0; i < recog_data.n_operands; i++)
15215 if (REG_P (recog_data.operand[i])
15216 && REGNO (recog_data.operand[i]) >= 4)
15221 /* Return nonzero when P points to register encoded via REX prefix.
15222 Called via for_each_rtx. */
15224 extended_reg_mentioned_1 (p, data)
15226 void *data ATTRIBUTE_UNUSED;
15228 unsigned int regno;
15231 regno = REGNO (*p);
15232 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15235 /* Return true when INSN mentions register that must be encoded using REX
15238 x86_extended_reg_mentioned_p (insn)
15241 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15244 #include "gt-i386.h"