1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003 Free Software Foundation, Inc.
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-attr.h"
43 #include "basic-block.h"
46 #include "target-def.h"
47 #include "langhooks.h"
49 #ifndef CHECK_STACK_LIMIT
50 #define CHECK_STACK_LIMIT (-1)
53 /* Return index of given mode in mult and division cost tables. */
54 #define MODE_INDEX(mode) \
55 ((mode) == QImode ? 0 \
56 : (mode) == HImode ? 1 \
57 : (mode) == SImode ? 2 \
58 : (mode) == DImode ? 3 \
61 /* Processor costs (relative to an add) */
63 struct processor_costs size_cost = { /* costs for tunning for size */
64 2, /* cost of an add instruction */
65 3, /* cost of a lea instruction */
66 2, /* variable shift costs */
67 3, /* constant shift costs */
68 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
69 0, /* cost of multiply per each bit set */
70 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
71 3, /* cost of movsx */
72 3, /* cost of movzx */
75 2, /* cost for loading QImode using movzbl */
76 {2, 2, 2}, /* cost of loading integer registers
77 in QImode, HImode and SImode.
78 Relative to reg-reg move (2). */
79 {2, 2, 2}, /* cost of storing integer registers */
80 2, /* cost of reg,reg fld/fst */
81 {2, 2, 2}, /* cost of loading fp registers
82 in SFmode, DFmode and XFmode */
83 {2, 2, 2}, /* cost of loading integer registers */
84 3, /* cost of moving MMX register */
85 {3, 3}, /* cost of loading MMX registers
86 in SImode and DImode */
87 {3, 3}, /* cost of storing MMX registers
88 in SImode and DImode */
89 3, /* cost of moving SSE register */
90 {3, 3, 3}, /* cost of loading SSE registers
91 in SImode, DImode and TImode */
92 {3, 3, 3}, /* cost of storing SSE registers
93 in SImode, DImode and TImode */
94 3, /* MMX or SSE register to integer */
95 0, /* size of prefetch block */
96 0, /* number of parallel prefetches */
98 2, /* cost of FADD and FSUB insns. */
99 2, /* cost of FMUL instruction. */
100 2, /* cost of FDIV instruction. */
101 2, /* cost of FABS instruction. */
102 2, /* cost of FCHS instruction. */
103 2, /* cost of FSQRT instruction. */
106 /* Processor costs (relative to an add) */
108 struct processor_costs i386_cost = { /* 386 specific costs */
109 1, /* cost of an add instruction */
110 1, /* cost of a lea instruction */
111 3, /* variable shift costs */
112 2, /* constant shift costs */
113 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
114 1, /* cost of multiply per each bit set */
115 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
116 3, /* cost of movsx */
117 2, /* cost of movzx */
118 15, /* "large" insn */
120 4, /* cost for loading QImode using movzbl */
121 {2, 4, 2}, /* cost of loading integer registers
122 in QImode, HImode and SImode.
123 Relative to reg-reg move (2). */
124 {2, 4, 2}, /* cost of storing integer registers */
125 2, /* cost of reg,reg fld/fst */
126 {8, 8, 8}, /* cost of loading fp registers
127 in SFmode, DFmode and XFmode */
128 {8, 8, 8}, /* cost of loading integer registers */
129 2, /* cost of moving MMX register */
130 {4, 8}, /* cost of loading MMX registers
131 in SImode and DImode */
132 {4, 8}, /* cost of storing MMX registers
133 in SImode and DImode */
134 2, /* cost of moving SSE register */
135 {4, 8, 16}, /* cost of loading SSE registers
136 in SImode, DImode and TImode */
137 {4, 8, 16}, /* cost of storing SSE registers
138 in SImode, DImode and TImode */
139 3, /* MMX or SSE register to integer */
140 0, /* size of prefetch block */
141 0, /* number of parallel prefetches */
143 23, /* cost of FADD and FSUB insns. */
144 27, /* cost of FMUL instruction. */
145 88, /* cost of FDIV instruction. */
146 22, /* cost of FABS instruction. */
147 24, /* cost of FCHS instruction. */
148 122, /* cost of FSQRT instruction. */
152 struct processor_costs i486_cost = { /* 486 specific costs */
153 1, /* cost of an add instruction */
154 1, /* cost of a lea instruction */
155 3, /* variable shift costs */
156 2, /* constant shift costs */
157 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
158 1, /* cost of multiply per each bit set */
159 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
160 3, /* cost of movsx */
161 2, /* cost of movzx */
162 15, /* "large" insn */
164 4, /* cost for loading QImode using movzbl */
165 {2, 4, 2}, /* cost of loading integer registers
166 in QImode, HImode and SImode.
167 Relative to reg-reg move (2). */
168 {2, 4, 2}, /* cost of storing integer registers */
169 2, /* cost of reg,reg fld/fst */
170 {8, 8, 8}, /* cost of loading fp registers
171 in SFmode, DFmode and XFmode */
172 {8, 8, 8}, /* cost of loading integer registers */
173 2, /* cost of moving MMX register */
174 {4, 8}, /* cost of loading MMX registers
175 in SImode and DImode */
176 {4, 8}, /* cost of storing MMX registers
177 in SImode and DImode */
178 2, /* cost of moving SSE register */
179 {4, 8, 16}, /* cost of loading SSE registers
180 in SImode, DImode and TImode */
181 {4, 8, 16}, /* cost of storing SSE registers
182 in SImode, DImode and TImode */
183 3, /* MMX or SSE register to integer */
184 0, /* size of prefetch block */
185 0, /* number of parallel prefetches */
187 8, /* cost of FADD and FSUB insns. */
188 16, /* cost of FMUL instruction. */
189 73, /* cost of FDIV instruction. */
190 3, /* cost of FABS instruction. */
191 3, /* cost of FCHS instruction. */
192 83, /* cost of FSQRT instruction. */
196 struct processor_costs pentium_cost = {
197 1, /* cost of an add instruction */
198 1, /* cost of a lea instruction */
199 4, /* variable shift costs */
200 1, /* constant shift costs */
201 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
202 0, /* cost of multiply per each bit set */
203 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
204 3, /* cost of movsx */
205 2, /* cost of movzx */
206 8, /* "large" insn */
208 6, /* cost for loading QImode using movzbl */
209 {2, 4, 2}, /* cost of loading integer registers
210 in QImode, HImode and SImode.
211 Relative to reg-reg move (2). */
212 {2, 4, 2}, /* cost of storing integer registers */
213 2, /* cost of reg,reg fld/fst */
214 {2, 2, 6}, /* cost of loading fp registers
215 in SFmode, DFmode and XFmode */
216 {4, 4, 6}, /* cost of loading integer registers */
217 8, /* cost of moving MMX register */
218 {8, 8}, /* cost of loading MMX registers
219 in SImode and DImode */
220 {8, 8}, /* cost of storing MMX registers
221 in SImode and DImode */
222 2, /* cost of moving SSE register */
223 {4, 8, 16}, /* cost of loading SSE registers
224 in SImode, DImode and TImode */
225 {4, 8, 16}, /* cost of storing SSE registers
226 in SImode, DImode and TImode */
227 3, /* MMX or SSE register to integer */
228 0, /* size of prefetch block */
229 0, /* number of parallel prefetches */
231 3, /* cost of FADD and FSUB insns. */
232 3, /* cost of FMUL instruction. */
233 39, /* cost of FDIV instruction. */
234 1, /* cost of FABS instruction. */
235 1, /* cost of FCHS instruction. */
236 70, /* cost of FSQRT instruction. */
240 struct processor_costs pentiumpro_cost = {
241 1, /* cost of an add instruction */
242 1, /* cost of a lea instruction */
243 1, /* variable shift costs */
244 1, /* constant shift costs */
245 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
246 0, /* cost of multiply per each bit set */
247 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
248 1, /* cost of movsx */
249 1, /* cost of movzx */
250 8, /* "large" insn */
252 2, /* cost for loading QImode using movzbl */
253 {4, 4, 4}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 2, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {2, 2, 6}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {4, 4, 6}, /* cost of loading integer registers */
261 2, /* cost of moving MMX register */
262 {2, 2}, /* cost of loading MMX registers
263 in SImode and DImode */
264 {2, 2}, /* cost of storing MMX registers
265 in SImode and DImode */
266 2, /* cost of moving SSE register */
267 {2, 2, 8}, /* cost of loading SSE registers
268 in SImode, DImode and TImode */
269 {2, 2, 8}, /* cost of storing SSE registers
270 in SImode, DImode and TImode */
271 3, /* MMX or SSE register to integer */
272 32, /* size of prefetch block */
273 6, /* number of parallel prefetches */
275 3, /* cost of FADD and FSUB insns. */
276 5, /* cost of FMUL instruction. */
277 56, /* cost of FDIV instruction. */
278 2, /* cost of FABS instruction. */
279 2, /* cost of FCHS instruction. */
280 56, /* cost of FSQRT instruction. */
284 struct processor_costs k6_cost = {
285 1, /* cost of an add instruction */
286 2, /* cost of a lea instruction */
287 1, /* variable shift costs */
288 1, /* constant shift costs */
289 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
290 0, /* cost of multiply per each bit set */
291 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
292 2, /* cost of movsx */
293 2, /* cost of movzx */
294 8, /* "large" insn */
296 3, /* cost for loading QImode using movzbl */
297 {4, 5, 4}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 3, 2}, /* cost of storing integer registers */
301 4, /* cost of reg,reg fld/fst */
302 {6, 6, 6}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {4, 4, 4}, /* cost of loading integer registers */
305 2, /* cost of moving MMX register */
306 {2, 2}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {2, 2}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {2, 2, 8}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {2, 2, 8}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 6, /* MMX or SSE register to integer */
316 32, /* size of prefetch block */
317 1, /* number of parallel prefetches */
319 2, /* cost of FADD and FSUB insns. */
320 2, /* cost of FMUL instruction. */
321 56, /* cost of FDIV instruction. */
322 2, /* cost of FABS instruction. */
323 2, /* cost of FCHS instruction. */
324 56, /* cost of FSQRT instruction. */
328 struct processor_costs athlon_cost = {
329 1, /* cost of an add instruction */
330 2, /* cost of a lea instruction */
331 1, /* variable shift costs */
332 1, /* constant shift costs */
333 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
334 0, /* cost of multiply per each bit set */
335 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
336 1, /* cost of movsx */
337 1, /* cost of movzx */
338 8, /* "large" insn */
340 4, /* cost for loading QImode using movzbl */
341 {3, 4, 3}, /* cost of loading integer registers
342 in QImode, HImode and SImode.
343 Relative to reg-reg move (2). */
344 {3, 4, 3}, /* cost of storing integer registers */
345 4, /* cost of reg,reg fld/fst */
346 {4, 4, 12}, /* cost of loading fp registers
347 in SFmode, DFmode and XFmode */
348 {6, 6, 8}, /* cost of loading integer registers */
349 2, /* cost of moving MMX register */
350 {4, 4}, /* cost of loading MMX registers
351 in SImode and DImode */
352 {4, 4}, /* cost of storing MMX registers
353 in SImode and DImode */
354 2, /* cost of moving SSE register */
355 {4, 4, 6}, /* cost of loading SSE registers
356 in SImode, DImode and TImode */
357 {4, 4, 5}, /* cost of storing SSE registers
358 in SImode, DImode and TImode */
359 5, /* MMX or SSE register to integer */
360 64, /* size of prefetch block */
361 6, /* number of parallel prefetches */
363 4, /* cost of FADD and FSUB insns. */
364 4, /* cost of FMUL instruction. */
365 24, /* cost of FDIV instruction. */
366 2, /* cost of FABS instruction. */
367 2, /* cost of FCHS instruction. */
368 35, /* cost of FSQRT instruction. */
372 struct processor_costs k8_cost = {
373 1, /* cost of an add instruction */
374 2, /* cost of a lea instruction */
375 1, /* variable shift costs */
376 1, /* constant shift costs */
377 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
378 0, /* cost of multiply per each bit set */
379 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
380 1, /* cost of movsx */
381 1, /* cost of movzx */
382 8, /* "large" insn */
384 4, /* cost for loading QImode using movzbl */
385 {3, 4, 3}, /* cost of loading integer registers
386 in QImode, HImode and SImode.
387 Relative to reg-reg move (2). */
388 {3, 4, 3}, /* cost of storing integer registers */
389 4, /* cost of reg,reg fld/fst */
390 {4, 4, 12}, /* cost of loading fp registers
391 in SFmode, DFmode and XFmode */
392 {6, 6, 8}, /* cost of loading integer registers */
393 2, /* cost of moving MMX register */
394 {3, 3}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {4, 4}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {4, 3, 6}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {4, 4, 5}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 5, /* MMX or SSE register to integer */
404 64, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 4, /* cost of FADD and FSUB insns. */
408 4, /* cost of FMUL instruction. */
409 19, /* cost of FDIV instruction. */
410 2, /* cost of FABS instruction. */
411 2, /* cost of FCHS instruction. */
412 35, /* cost of FSQRT instruction. */
416 struct processor_costs pentium4_cost = {
417 1, /* cost of an add instruction */
418 1, /* cost of a lea instruction */
419 4, /* variable shift costs */
420 4, /* constant shift costs */
421 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
422 0, /* cost of multiply per each bit set */
423 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
424 1, /* cost of movsx */
425 1, /* cost of movzx */
426 16, /* "large" insn */
428 2, /* cost for loading QImode using movzbl */
429 {4, 5, 4}, /* cost of loading integer registers
430 in QImode, HImode and SImode.
431 Relative to reg-reg move (2). */
432 {2, 3, 2}, /* cost of storing integer registers */
433 2, /* cost of reg,reg fld/fst */
434 {2, 2, 6}, /* cost of loading fp registers
435 in SFmode, DFmode and XFmode */
436 {4, 4, 6}, /* cost of loading integer registers */
437 2, /* cost of moving MMX register */
438 {2, 2}, /* cost of loading MMX registers
439 in SImode and DImode */
440 {2, 2}, /* cost of storing MMX registers
441 in SImode and DImode */
442 12, /* cost of moving SSE register */
443 {12, 12, 12}, /* cost of loading SSE registers
444 in SImode, DImode and TImode */
445 {2, 2, 8}, /* cost of storing SSE registers
446 in SImode, DImode and TImode */
447 10, /* MMX or SSE register to integer */
448 64, /* size of prefetch block */
449 6, /* number of parallel prefetches */
451 5, /* cost of FADD and FSUB insns. */
452 7, /* cost of FMUL instruction. */
453 43, /* cost of FDIV instruction. */
454 2, /* cost of FABS instruction. */
455 2, /* cost of FCHS instruction. */
456 43, /* cost of FSQRT instruction. */
459 const struct processor_costs *ix86_cost = &pentium_cost;
461 /* Processor feature/optimization bitmasks. */
462 #define m_386 (1<<PROCESSOR_I386)
463 #define m_486 (1<<PROCESSOR_I486)
464 #define m_PENT (1<<PROCESSOR_PENTIUM)
465 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
466 #define m_K6 (1<<PROCESSOR_K6)
467 #define m_ATHLON (1<<PROCESSOR_ATHLON)
468 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
469 #define m_K8 (1<<PROCESSOR_K8)
470 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
472 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
473 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
474 const int x86_zero_extend_with_and = m_486 | m_PENT;
475 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
476 const int x86_double_with_add = ~m_386;
477 const int x86_use_bit_test = m_386;
478 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
479 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
480 const int x86_3dnow_a = m_ATHLON_K8;
481 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
482 const int x86_branch_hints = m_PENT4;
483 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
484 const int x86_partial_reg_stall = m_PPRO;
485 const int x86_use_loop = m_K6;
486 const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
487 const int x86_use_mov0 = m_K6;
488 const int x86_use_cltd = ~(m_PENT | m_K6);
489 const int x86_read_modify_write = ~m_PENT;
490 const int x86_read_modify = ~(m_PENT | m_PPRO);
491 const int x86_split_long_moves = m_PPRO;
492 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
493 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
494 const int x86_single_stringop = m_386 | m_PENT4;
495 const int x86_qimode_math = ~(0);
496 const int x86_promote_qi_regs = 0;
497 const int x86_himode_math = ~(m_PPRO);
498 const int x86_promote_hi_regs = m_PPRO;
499 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
500 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
501 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
502 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
503 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
504 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
505 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
506 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
507 const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
508 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
509 const int x86_decompose_lea = m_PENT4;
510 const int x86_shift1 = ~m_486;
511 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
512 const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
513 /* Set for machines where the type and dependencies are resolved on SSE register
514 parts instead of whole registers, so we may maintain just lower part of
515 scalar values in proper format leaving the upper part undefined. */
516 const int x86_sse_partial_regs = m_ATHLON_K8;
517 /* Athlon optimizes partial-register FPS special case, thus avoiding the
518 need for extra instructions beforehand */
519 const int x86_sse_partial_regs_for_cvtsd2ss = 0;
520 const int x86_sse_typeless_stores = m_ATHLON_K8;
521 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
522 const int x86_use_ffreep = m_ATHLON_K8;
523 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
525 /* In case the average insn count for single function invocation is
526 lower than this constant, emit fast (but longer) prologue and
528 #define FAST_PROLOGUE_INSN_COUNT 20
530 /* Set by prologue expander and used by epilogue expander to determine
532 static int use_fast_prologue_epilogue;
534 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
535 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
536 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
537 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
539 /* Array of the smallest class containing reg number REGNO, indexed by
540 REGNO. Used by REGNO_REG_CLASS in i386.h. */
542 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
545 AREG, DREG, CREG, BREG,
547 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
549 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
550 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
553 /* flags, fpsr, dirflag, frame */
554 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
555 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
557 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
559 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
560 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
561 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
565 /* The "default" register map used in 32bit mode. */
567 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
569 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
570 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
571 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
572 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
573 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
574 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
575 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
578 static int const x86_64_int_parameter_registers[6] =
580 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
581 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
584 static int const x86_64_int_return_registers[4] =
586 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
589 /* The "default" register map used in 64bit mode. */
590 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
592 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
593 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
594 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
595 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
596 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
597 8,9,10,11,12,13,14,15, /* extended integer registers */
598 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
601 /* Define the register numbers to be used in Dwarf debugging information.
602 The SVR4 reference port C compiler uses the following register numbers
603 in its Dwarf output code:
604 0 for %eax (gcc regno = 0)
605 1 for %ecx (gcc regno = 2)
606 2 for %edx (gcc regno = 1)
607 3 for %ebx (gcc regno = 3)
608 4 for %esp (gcc regno = 7)
609 5 for %ebp (gcc regno = 6)
610 6 for %esi (gcc regno = 4)
611 7 for %edi (gcc regno = 5)
612 The following three DWARF register numbers are never generated by
613 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
614 believes these numbers have these meanings.
615 8 for %eip (no gcc equivalent)
616 9 for %eflags (gcc regno = 17)
617 10 for %trapno (no gcc equivalent)
618 It is not at all clear how we should number the FP stack registers
619 for the x86 architecture. If the version of SDB on x86/svr4 were
620 a bit less brain dead with respect to floating-point then we would
621 have a precedent to follow with respect to DWARF register numbers
622 for x86 FP registers, but the SDB on x86/svr4 is so completely
623 broken with respect to FP registers that it is hardly worth thinking
624 of it as something to strive for compatibility with.
625 The version of x86/svr4 SDB I have at the moment does (partially)
626 seem to believe that DWARF register number 11 is associated with
627 the x86 register %st(0), but that's about all. Higher DWARF
628 register numbers don't seem to be associated with anything in
629 particular, and even for DWARF regno 11, SDB only seems to under-
630 stand that it should say that a variable lives in %st(0) (when
631 asked via an `=' command) if we said it was in DWARF regno 11,
632 but SDB still prints garbage when asked for the value of the
633 variable in question (via a `/' command).
634 (Also note that the labels SDB prints for various FP stack regs
635 when doing an `x' command are all wrong.)
636 Note that these problems generally don't affect the native SVR4
637 C compiler because it doesn't allow the use of -O with -g and
638 because when it is *not* optimizing, it allocates a memory
639 location for each floating-point variable, and the memory
640 location is what gets described in the DWARF AT_location
641 attribute for the variable in question.
642 Regardless of the severe mental illness of the x86/svr4 SDB, we
643 do something sensible here and we use the following DWARF
644 register numbers. Note that these are all stack-top-relative
646 11 for %st(0) (gcc regno = 8)
647 12 for %st(1) (gcc regno = 9)
648 13 for %st(2) (gcc regno = 10)
649 14 for %st(3) (gcc regno = 11)
650 15 for %st(4) (gcc regno = 12)
651 16 for %st(5) (gcc regno = 13)
652 17 for %st(6) (gcc regno = 14)
653 18 for %st(7) (gcc regno = 15)
655 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
657 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
658 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
659 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
660 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
661 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
662 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
663 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
666 /* Test and compare insns in i386.md store the information needed to
667 generate branch and scc insns here. */
669 rtx ix86_compare_op0 = NULL_RTX;
670 rtx ix86_compare_op1 = NULL_RTX;
672 /* The encoding characters for the four TLS models present in ELF. */
674 static char const tls_model_chars[] = " GLil";
676 #define MAX_386_STACK_LOCALS 3
677 /* Size of the register save area. */
678 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
680 /* Define the structure for the machine field in struct function. */
681 struct machine_function GTY(())
683 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
684 const char *some_ld_name;
685 int save_varrargs_registers;
686 int accesses_prev_frame;
689 #define ix86_stack_locals (cfun->machine->stack_locals)
690 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
692 /* Structure describing stack frame layout.
693 Stack grows downward:
699 saved frame pointer if frame_pointer_needed
700 <- HARD_FRAME_POINTER
706 > to_allocate <- FRAME_POINTER
718 int outgoing_arguments_size;
721 HOST_WIDE_INT to_allocate;
722 /* The offsets relative to ARG_POINTER. */
723 HOST_WIDE_INT frame_pointer_offset;
724 HOST_WIDE_INT hard_frame_pointer_offset;
725 HOST_WIDE_INT stack_pointer_offset;
728 /* Used to enable/disable debugging features. */
729 const char *ix86_debug_arg_string, *ix86_debug_addr_string;
730 /* Code model option as passed by user. */
731 const char *ix86_cmodel_string;
733 enum cmodel ix86_cmodel;
735 const char *ix86_asm_string;
736 enum asm_dialect ix86_asm_dialect = ASM_ATT;
738 const char *ix86_tls_dialect_string;
739 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
741 /* Which unit we are generating floating point math for. */
742 enum fpmath_unit ix86_fpmath;
744 /* Which cpu are we scheduling for. */
745 enum processor_type ix86_cpu;
746 /* Which instruction set architecture to use. */
747 enum processor_type ix86_arch;
749 /* Strings to hold which cpu and instruction set architecture to use. */
750 const char *ix86_cpu_string; /* for -mcpu=<xxx> */
751 const char *ix86_arch_string; /* for -march=<xxx> */
752 const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
754 /* # of registers to use to pass arguments. */
755 const char *ix86_regparm_string;
757 /* true if sse prefetch instruction is not NOOP. */
758 int x86_prefetch_sse;
760 /* ix86_regparm_string as a number */
763 /* Alignment to use for loops and jumps: */
765 /* Power of two alignment for loops. */
766 const char *ix86_align_loops_string;
768 /* Power of two alignment for non-loop jumps. */
769 const char *ix86_align_jumps_string;
771 /* Power of two alignment for stack boundary in bytes. */
772 const char *ix86_preferred_stack_boundary_string;
774 /* Preferred alignment for stack boundary in bits. */
775 int ix86_preferred_stack_boundary;
777 /* Values 1-5: see jump.c */
778 int ix86_branch_cost;
779 const char *ix86_branch_cost_string;
781 /* Power of two alignment for functions. */
782 const char *ix86_align_funcs_string;
784 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
785 static char internal_label_prefix[16];
786 static int internal_label_prefix_len;
788 static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
789 static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
790 static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
791 static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
793 static const char *get_some_local_dynamic_name PARAMS ((void));
794 static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
795 static rtx maybe_get_pool_constant PARAMS ((rtx));
796 static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
797 static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
799 static rtx get_thread_pointer PARAMS ((void));
800 static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
801 static rtx gen_push PARAMS ((rtx));
802 static int memory_address_length PARAMS ((rtx addr));
803 static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
804 static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
805 static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
806 static void ix86_dump_ppro_packet PARAMS ((FILE *));
807 static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
808 static struct machine_function * ix86_init_machine_status PARAMS ((void));
809 static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
810 static int ix86_nsaved_regs PARAMS ((void));
811 static void ix86_emit_save_regs PARAMS ((void));
812 static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
813 static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
814 static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
815 static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
816 static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
817 static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
818 static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
819 static rtx ix86_expand_aligntest PARAMS ((rtx, int));
820 static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
821 static int ix86_issue_rate PARAMS ((void));
822 static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
823 static void ix86_sched_init PARAMS ((FILE *, int, int));
824 static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
825 static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
826 static int ia32_use_dfa_pipeline_interface PARAMS ((void));
827 static int ia32_multipass_dfa_lookahead PARAMS ((void));
828 static void ix86_init_mmx_sse_builtins PARAMS ((void));
829 static rtx x86_this_parameter PARAMS ((tree));
830 static void x86_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
831 HOST_WIDE_INT, tree));
832 static bool x86_can_output_mi_thunk PARAMS ((tree, HOST_WIDE_INT,
833 HOST_WIDE_INT, tree));
834 bool ix86_expand_carry_flag_compare PARAMS ((enum rtx_code, rtx, rtx, rtx*));
838 rtx base, index, disp;
842 static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
843 static bool ix86_cannot_force_const_mem PARAMS ((rtx));
845 static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
846 static const char *ix86_strip_name_encoding PARAMS ((const char *))
849 struct builtin_description;
850 static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
852 static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
854 static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
855 static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
856 static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
857 static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
858 static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
859 static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
860 static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
864 static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
866 static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
867 static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
868 static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
869 static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
870 static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
871 static int ix86_save_reg PARAMS ((unsigned int, int));
872 static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
873 static int ix86_comp_type_attributes PARAMS ((tree, tree));
874 static int ix86_fntype_regparm PARAMS ((tree));
875 const struct attribute_spec ix86_attribute_table[];
876 static bool ix86_function_ok_for_sibcall PARAMS ((tree, tree));
877 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
878 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
879 static int ix86_value_regno PARAMS ((enum machine_mode));
880 static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
881 static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
882 static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
883 static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
885 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
886 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
889 /* Register class used for passing given 64bit part of the argument.
890 These represent classes as documented by the PS ABI, with the exception
891 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
892 use SF or DFmode move instead of DImode to avoid reformatting penalties.
894 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
895 whenever possible (upper half does contain padding).
897 enum x86_64_reg_class
900 X86_64_INTEGER_CLASS,
901 X86_64_INTEGERSI_CLASS,
910 static const char * const x86_64_reg_class_name[] =
911 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
913 #define MAX_CLASSES 4
914 static int classify_argument PARAMS ((enum machine_mode, tree,
915 enum x86_64_reg_class [MAX_CLASSES],
917 static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
919 static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
921 static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
922 enum x86_64_reg_class));
924 /* Initialize the GCC target structure. */
925 #undef TARGET_ATTRIBUTE_TABLE
926 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
927 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
928 # undef TARGET_MERGE_DECL_ATTRIBUTES
929 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
932 #undef TARGET_COMP_TYPE_ATTRIBUTES
933 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
935 #undef TARGET_INIT_BUILTINS
936 #define TARGET_INIT_BUILTINS ix86_init_builtins
938 #undef TARGET_EXPAND_BUILTIN
939 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
941 #undef TARGET_ASM_FUNCTION_EPILOGUE
942 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
944 #undef TARGET_ASM_OPEN_PAREN
945 #define TARGET_ASM_OPEN_PAREN ""
946 #undef TARGET_ASM_CLOSE_PAREN
947 #define TARGET_ASM_CLOSE_PAREN ""
949 #undef TARGET_ASM_ALIGNED_HI_OP
950 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
951 #undef TARGET_ASM_ALIGNED_SI_OP
952 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
954 #undef TARGET_ASM_ALIGNED_DI_OP
955 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
958 #undef TARGET_ASM_UNALIGNED_HI_OP
959 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
960 #undef TARGET_ASM_UNALIGNED_SI_OP
961 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
962 #undef TARGET_ASM_UNALIGNED_DI_OP
963 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
965 #undef TARGET_SCHED_ADJUST_COST
966 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
967 #undef TARGET_SCHED_ISSUE_RATE
968 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
969 #undef TARGET_SCHED_VARIABLE_ISSUE
970 #define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
971 #undef TARGET_SCHED_INIT
972 #define TARGET_SCHED_INIT ix86_sched_init
973 #undef TARGET_SCHED_REORDER
974 #define TARGET_SCHED_REORDER ix86_sched_reorder
975 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
976 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
977 ia32_use_dfa_pipeline_interface
978 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
979 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
980 ia32_multipass_dfa_lookahead
982 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
983 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
986 #undef TARGET_HAVE_TLS
987 #define TARGET_HAVE_TLS true
989 #undef TARGET_CANNOT_FORCE_CONST_MEM
990 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
992 #undef TARGET_MS_BITFIELD_LAYOUT_P
993 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
995 #undef TARGET_ASM_OUTPUT_MI_THUNK
996 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
997 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
998 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1000 #undef TARGET_RTX_COSTS
1001 #define TARGET_RTX_COSTS ix86_rtx_costs
1003 struct gcc_target targetm = TARGET_INITIALIZER;
1005 /* Sometimes certain combinations of command options do not make
1006 sense on a particular target machine. You can define a macro
1007 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1008 defined, is executed once just after all the command options have
1011 Don't use this macro to turn on various extra optimizations for
1012 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1018 /* Comes from final.c -- no real reason to change it. */
1019 #define MAX_CODE_ALIGN 16
1023 const struct processor_costs *cost; /* Processor costs */
1024 const int target_enable; /* Target flags to enable. */
1025 const int target_disable; /* Target flags to disable. */
1026 const int align_loop; /* Default alignments. */
1027 const int align_loop_max_skip;
1028 const int align_jump;
1029 const int align_jump_max_skip;
1030 const int align_func;
1032 const processor_target_table[PROCESSOR_max] =
1034 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1035 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1036 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1037 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1038 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1039 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1040 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1041 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1044 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1047 const char *const name; /* processor name or nickname. */
1048 const enum processor_type processor;
1049 const enum pta_flags
1054 PTA_PREFETCH_SSE = 8,
1060 const processor_alias_table[] =
1062 {"i386", PROCESSOR_I386, 0},
1063 {"i486", PROCESSOR_I486, 0},
1064 {"i586", PROCESSOR_PENTIUM, 0},
1065 {"pentium", PROCESSOR_PENTIUM, 0},
1066 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1067 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1068 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1069 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1070 {"i686", PROCESSOR_PENTIUMPRO, 0},
1071 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1072 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1073 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1074 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
1075 PTA_MMX | PTA_PREFETCH_SSE},
1076 {"k6", PROCESSOR_K6, PTA_MMX},
1077 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1078 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1079 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1081 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1082 | PTA_3DNOW | PTA_3DNOW_A},
1083 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1084 | PTA_3DNOW_A | PTA_SSE},
1085 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1086 | PTA_3DNOW_A | PTA_SSE},
1087 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1088 | PTA_3DNOW_A | PTA_SSE},
1089 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1090 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1093 int const pta_size = ARRAY_SIZE (processor_alias_table);
1095 /* By default our XFmode is the 80-bit extended format. If we have
1096 use TFmode instead, it's also the 80-bit format, but with padding. */
1097 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
1098 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
1100 /* Set the default values for switches whose default depends on TARGET_64BIT
1101 in case they weren't overwritten by command line options. */
1104 if (flag_omit_frame_pointer == 2)
1105 flag_omit_frame_pointer = 1;
1106 if (flag_asynchronous_unwind_tables == 2)
1107 flag_asynchronous_unwind_tables = 1;
1108 if (flag_pcc_struct_return == 2)
1109 flag_pcc_struct_return = 0;
1113 if (flag_omit_frame_pointer == 2)
1114 flag_omit_frame_pointer = 0;
1115 if (flag_asynchronous_unwind_tables == 2)
1116 flag_asynchronous_unwind_tables = 0;
1117 if (flag_pcc_struct_return == 2)
1118 flag_pcc_struct_return = 1;
1121 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1122 SUBTARGET_OVERRIDE_OPTIONS;
1125 if (!ix86_cpu_string && ix86_arch_string)
1126 ix86_cpu_string = ix86_arch_string;
1127 if (!ix86_cpu_string)
1128 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1129 if (!ix86_arch_string)
1130 ix86_arch_string = TARGET_64BIT ? "k8" : "i386";
1132 if (ix86_cmodel_string != 0)
1134 if (!strcmp (ix86_cmodel_string, "small"))
1135 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1137 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1138 else if (!strcmp (ix86_cmodel_string, "32"))
1139 ix86_cmodel = CM_32;
1140 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1141 ix86_cmodel = CM_KERNEL;
1142 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1143 ix86_cmodel = CM_MEDIUM;
1144 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1145 ix86_cmodel = CM_LARGE;
1147 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1151 ix86_cmodel = CM_32;
1153 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1155 if (ix86_asm_string != 0)
1157 if (!strcmp (ix86_asm_string, "intel"))
1158 ix86_asm_dialect = ASM_INTEL;
1159 else if (!strcmp (ix86_asm_string, "att"))
1160 ix86_asm_dialect = ASM_ATT;
1162 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1164 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1165 error ("code model `%s' not supported in the %s bit mode",
1166 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1167 if (ix86_cmodel == CM_LARGE)
1168 sorry ("code model `large' not supported yet");
1169 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1170 sorry ("%i-bit mode not compiled in",
1171 (target_flags & MASK_64BIT) ? 64 : 32);
1173 for (i = 0; i < pta_size; i++)
1174 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1176 ix86_arch = processor_alias_table[i].processor;
1177 /* Default cpu tuning to the architecture. */
1178 ix86_cpu = ix86_arch;
1179 if (processor_alias_table[i].flags & PTA_MMX
1180 && !(target_flags_explicit & MASK_MMX))
1181 target_flags |= MASK_MMX;
1182 if (processor_alias_table[i].flags & PTA_3DNOW
1183 && !(target_flags_explicit & MASK_3DNOW))
1184 target_flags |= MASK_3DNOW;
1185 if (processor_alias_table[i].flags & PTA_3DNOW_A
1186 && !(target_flags_explicit & MASK_3DNOW_A))
1187 target_flags |= MASK_3DNOW_A;
1188 if (processor_alias_table[i].flags & PTA_SSE
1189 && !(target_flags_explicit & MASK_SSE))
1190 target_flags |= MASK_SSE;
1191 if (processor_alias_table[i].flags & PTA_SSE2
1192 && !(target_flags_explicit & MASK_SSE2))
1193 target_flags |= MASK_SSE2;
1194 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1195 x86_prefetch_sse = true;
1196 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1197 error ("CPU you selected does not support x86-64 instruction set");
1202 error ("bad value (%s) for -march= switch", ix86_arch_string);
1204 for (i = 0; i < pta_size; i++)
1205 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1207 ix86_cpu = processor_alias_table[i].processor;
1208 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1209 error ("CPU you selected does not support x86-64 instruction set");
1212 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1213 x86_prefetch_sse = true;
1215 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
1218 ix86_cost = &size_cost;
1220 ix86_cost = processor_target_table[ix86_cpu].cost;
1221 target_flags |= processor_target_table[ix86_cpu].target_enable;
1222 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1224 /* Arrange to set up i386_stack_locals for all functions. */
1225 init_machine_status = ix86_init_machine_status;
1227 /* Validate -mregparm= value. */
1228 if (ix86_regparm_string)
1230 i = atoi (ix86_regparm_string);
1231 if (i < 0 || i > REGPARM_MAX)
1232 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1238 ix86_regparm = REGPARM_MAX;
1240 /* If the user has provided any of the -malign-* options,
1241 warn and use that value only if -falign-* is not set.
1242 Remove this code in GCC 3.2 or later. */
1243 if (ix86_align_loops_string)
1245 warning ("-malign-loops is obsolete, use -falign-loops");
1246 if (align_loops == 0)
1248 i = atoi (ix86_align_loops_string);
1249 if (i < 0 || i > MAX_CODE_ALIGN)
1250 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1252 align_loops = 1 << i;
1256 if (ix86_align_jumps_string)
1258 warning ("-malign-jumps is obsolete, use -falign-jumps");
1259 if (align_jumps == 0)
1261 i = atoi (ix86_align_jumps_string);
1262 if (i < 0 || i > MAX_CODE_ALIGN)
1263 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1265 align_jumps = 1 << i;
1269 if (ix86_align_funcs_string)
1271 warning ("-malign-functions is obsolete, use -falign-functions");
1272 if (align_functions == 0)
1274 i = atoi (ix86_align_funcs_string);
1275 if (i < 0 || i > MAX_CODE_ALIGN)
1276 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1278 align_functions = 1 << i;
1282 /* Default align_* from the processor table. */
1283 if (align_loops == 0)
1285 align_loops = processor_target_table[ix86_cpu].align_loop;
1286 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1288 if (align_jumps == 0)
1290 align_jumps = processor_target_table[ix86_cpu].align_jump;
1291 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1293 if (align_functions == 0)
1295 align_functions = processor_target_table[ix86_cpu].align_func;
1298 /* Validate -mpreferred-stack-boundary= value, or provide default.
1299 The default of 128 bits is for Pentium III's SSE __m128, but we
1300 don't want additional code to keep the stack aligned when
1301 optimizing for code size. */
1302 ix86_preferred_stack_boundary = (optimize_size
1303 ? TARGET_64BIT ? 128 : 32
1305 if (ix86_preferred_stack_boundary_string)
1307 i = atoi (ix86_preferred_stack_boundary_string);
1308 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1309 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1310 TARGET_64BIT ? 4 : 2);
1312 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1315 /* Validate -mbranch-cost= value, or provide default. */
1316 ix86_branch_cost = processor_target_table[ix86_cpu].cost->branch_cost;
1317 if (ix86_branch_cost_string)
1319 i = atoi (ix86_branch_cost_string);
1321 error ("-mbranch-cost=%d is not between 0 and 5", i);
1323 ix86_branch_cost = i;
1326 if (ix86_tls_dialect_string)
1328 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1329 ix86_tls_dialect = TLS_DIALECT_GNU;
1330 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1331 ix86_tls_dialect = TLS_DIALECT_SUN;
1333 error ("bad value (%s) for -mtls-dialect= switch",
1334 ix86_tls_dialect_string);
1337 /* Keep nonleaf frame pointers. */
1338 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1339 flag_omit_frame_pointer = 1;
1341 /* If we're doing fast math, we don't care about comparison order
1342 wrt NaNs. This lets us use a shorter comparison sequence. */
1343 if (flag_unsafe_math_optimizations)
1344 target_flags &= ~MASK_IEEE_FP;
1346 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1347 since the insns won't need emulation. */
1348 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1349 target_flags &= ~MASK_NO_FANCY_MATH_387;
1353 if (TARGET_ALIGN_DOUBLE)
1354 error ("-malign-double makes no sense in the 64bit mode");
1356 error ("-mrtd calling convention not supported in the 64bit mode");
1357 /* Enable by default the SSE and MMX builtins. */
1358 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1359 ix86_fpmath = FPMATH_SSE;
1362 ix86_fpmath = FPMATH_387;
1364 if (ix86_fpmath_string != 0)
1366 if (! strcmp (ix86_fpmath_string, "387"))
1367 ix86_fpmath = FPMATH_387;
1368 else if (! strcmp (ix86_fpmath_string, "sse"))
1372 warning ("SSE instruction set disabled, using 387 arithmetics");
1373 ix86_fpmath = FPMATH_387;
1376 ix86_fpmath = FPMATH_SSE;
1378 else if (! strcmp (ix86_fpmath_string, "387,sse")
1379 || ! strcmp (ix86_fpmath_string, "sse,387"))
1383 warning ("SSE instruction set disabled, using 387 arithmetics");
1384 ix86_fpmath = FPMATH_387;
1386 else if (!TARGET_80387)
1388 warning ("387 instruction set disabled, using SSE arithmetics");
1389 ix86_fpmath = FPMATH_SSE;
1392 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1395 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1398 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1402 target_flags |= MASK_MMX;
1403 x86_prefetch_sse = true;
1406 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1409 target_flags |= MASK_MMX;
1410 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1411 extensions it adds. */
1412 if (x86_3dnow_a & (1 << ix86_arch))
1413 target_flags |= MASK_3DNOW_A;
1415 if ((x86_accumulate_outgoing_args & CPUMASK)
1416 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1418 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1420 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1423 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1424 p = strchr (internal_label_prefix, 'X');
1425 internal_label_prefix_len = p - internal_label_prefix;
1431 optimization_options (level, size)
1433 int size ATTRIBUTE_UNUSED;
1435 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1436 make the problem with not enough registers even worse. */
1437 #ifdef INSN_SCHEDULING
1439 flag_schedule_insns = 0;
1442 /* The default values of these switches depend on the TARGET_64BIT
1443 that is not known at this moment. Mark these values with 2 and
1444 let user the to override these. In case there is no command line option
1445 specifying them, we will set the defaults in override_options. */
1447 flag_omit_frame_pointer = 2;
1448 flag_pcc_struct_return = 2;
1449 flag_asynchronous_unwind_tables = 2;
1452 /* Table of valid machine attributes. */
1453 const struct attribute_spec ix86_attribute_table[] =
1455 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1456 /* Stdcall attribute says callee is responsible for popping arguments
1457 if they are not variable. */
1458 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1459 /* Fastcall attribute says callee is responsible for popping arguments
1460 if they are not variable. */
1461 { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1462 /* Cdecl attribute says the callee is a normal C declaration */
1463 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1464 /* Regparm attribute specifies how many integer arguments are to be
1465 passed in registers. */
1466 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1467 #ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1468 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1469 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1470 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1472 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1473 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1474 { NULL, 0, 0, false, false, false, NULL }
1477 /* If PIC, we cannot make sibling calls to global functions
1478 because the PLT requires %ebx live.
1479 If we are returning floats on the register stack, we cannot make
1480 sibling calls to functions that return floats. (The stack adjust
1481 instruction will wind up after the sibcall jump, and not be executed.) */
1484 ix86_function_ok_for_sibcall (decl, exp)
1488 /* If we are generating position-independent code, we cannot sibcall
1489 optimize any indirect call, or a direct call to a global function,
1490 as the PLT requires %ebx be live. */
1491 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1494 /* If we are returning floats on the 80387 register stack, we cannot
1495 make a sibcall from a function that doesn't return a float to a
1496 function that does; the necessary stack adjustment will not be
1498 if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1499 && ! STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1502 /* If this call is indirect, we'll need to be able to use a call-clobbered
1503 register for the address of the target function. Make sure that all
1504 such registers are not used for passing parameters. */
1505 if (!decl && !TARGET_64BIT)
1507 int regparm = ix86_regparm;
1510 /* We're looking at the CALL_EXPR, we need the type of the function. */
1511 type = TREE_OPERAND (exp, 0); /* pointer expression */
1512 type = TREE_TYPE (type); /* pointer type */
1513 type = TREE_TYPE (type); /* function type */
1515 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1517 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1521 /* ??? Need to count the actual number of registers to be used,
1522 not the possible number of registers. Fix later. */
1527 /* Otherwise okay. That also includes certain types of indirect calls. */
1531 /* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1532 arguments as in struct attribute_spec.handler. */
1534 ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1537 tree args ATTRIBUTE_UNUSED;
1538 int flags ATTRIBUTE_UNUSED;
1541 if (TREE_CODE (*node) != FUNCTION_TYPE
1542 && TREE_CODE (*node) != METHOD_TYPE
1543 && TREE_CODE (*node) != FIELD_DECL
1544 && TREE_CODE (*node) != TYPE_DECL)
1546 warning ("`%s' attribute only applies to functions",
1547 IDENTIFIER_POINTER (name));
1548 *no_add_attrs = true;
1552 if (is_attribute_p ("fastcall", name))
1554 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1556 error ("fastcall and stdcall attributes are not compatible");
1558 else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1560 error ("fastcall and regparm attributes are not compatible");
1563 else if (is_attribute_p ("stdcall", name))
1565 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1567 error ("fastcall and stdcall attributes are not compatible");
1574 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1575 *no_add_attrs = true;
1581 /* Handle a "regparm" attribute;
1582 arguments as in struct attribute_spec.handler. */
1584 ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1588 int flags ATTRIBUTE_UNUSED;
1591 if (TREE_CODE (*node) != FUNCTION_TYPE
1592 && TREE_CODE (*node) != METHOD_TYPE
1593 && TREE_CODE (*node) != FIELD_DECL
1594 && TREE_CODE (*node) != TYPE_DECL)
1596 warning ("`%s' attribute only applies to functions",
1597 IDENTIFIER_POINTER (name));
1598 *no_add_attrs = true;
1604 cst = TREE_VALUE (args);
1605 if (TREE_CODE (cst) != INTEGER_CST)
1607 warning ("`%s' attribute requires an integer constant argument",
1608 IDENTIFIER_POINTER (name));
1609 *no_add_attrs = true;
1611 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1613 warning ("argument to `%s' attribute larger than %d",
1614 IDENTIFIER_POINTER (name), REGPARM_MAX);
1615 *no_add_attrs = true;
1618 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1620 error ("fastcall and regparm attributes are not compatible");
1627 /* Return 0 if the attributes for two types are incompatible, 1 if they
1628 are compatible, and 2 if they are nearly compatible (which causes a
1629 warning to be generated). */
1632 ix86_comp_type_attributes (type1, type2)
1636 /* Check for mismatch of non-default calling convention. */
1637 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1639 if (TREE_CODE (type1) != FUNCTION_TYPE)
1642 /* Check for mismatched fastcall types */
1643 if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1644 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1647 /* Check for mismatched return types (cdecl vs stdcall). */
1648 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1649 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1654 /* Return the regparm value for a fuctio with the indicated TYPE. */
1657 ix86_fntype_regparm (type)
1662 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1664 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1666 return ix86_regparm;
1669 /* Value is the number of bytes of arguments automatically
1670 popped when returning from a subroutine call.
1671 FUNDECL is the declaration node of the function (as a tree),
1672 FUNTYPE is the data type of the function (as a tree),
1673 or for a library call it is an identifier node for the subroutine name.
1674 SIZE is the number of bytes of arguments passed on the stack.
1676 On the 80386, the RTD insn may be used to pop them if the number
1677 of args is fixed, but if the number is variable then the caller
1678 must pop them all. RTD can't be used for library calls now
1679 because the library is compiled with the Unix compiler.
1680 Use of RTD is a selectable option, since it is incompatible with
1681 standard Unix calling sequences. If the option is not selected,
1682 the caller must always pop the args.
1684 The attribute stdcall is equivalent to RTD on a per module basis. */
1687 ix86_return_pops_args (fundecl, funtype, size)
1692 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1694 /* Cdecl functions override -mrtd, and never pop the stack. */
1695 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1697 /* Stdcall and fastcall functions will pop the stack if not variable args. */
1698 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1699 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1703 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1704 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1705 == void_type_node)))
1709 /* Lose any fake structure return argument if it is passed on the stack. */
1710 if (aggregate_value_p (TREE_TYPE (funtype))
1713 int nregs = ix86_fntype_regparm (funtype);
1716 return GET_MODE_SIZE (Pmode);
1722 /* Argument support functions. */
1724 /* Return true when register may be used to pass function parameters. */
1726 ix86_function_arg_regno_p (regno)
1731 return (regno < REGPARM_MAX
1732 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1733 if (SSE_REGNO_P (regno) && TARGET_SSE)
1735 /* RAX is used as hidden argument to va_arg functions. */
1738 for (i = 0; i < REGPARM_MAX; i++)
1739 if (regno == x86_64_int_parameter_registers[i])
1744 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1745 for a call to a function whose data type is FNTYPE.
1746 For a library call, FNTYPE is 0. */
1749 init_cumulative_args (cum, fntype, libname)
1750 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
1751 tree fntype; /* tree ptr for function decl */
1752 rtx libname; /* SYMBOL_REF of library name or 0 */
1754 static CUMULATIVE_ARGS zero_cum;
1755 tree param, next_param;
1757 if (TARGET_DEBUG_ARG)
1759 fprintf (stderr, "\ninit_cumulative_args (");
1761 fprintf (stderr, "fntype code = %s, ret code = %s",
1762 tree_code_name[(int) TREE_CODE (fntype)],
1763 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1765 fprintf (stderr, "no fntype");
1768 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1773 /* Set up the number of registers to use for passing arguments. */
1774 cum->nregs = ix86_regparm;
1775 cum->sse_nregs = SSE_REGPARM_MAX;
1776 if (fntype && !TARGET_64BIT)
1778 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
1781 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1783 cum->maybe_vaarg = false;
1785 /* Use ecx and edx registers if function has fastcall attribute */
1786 if (fntype && !TARGET_64BIT)
1788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1796 /* Determine if this function has variable arguments. This is
1797 indicated by the last argument being 'void_type_mode' if there
1798 are no variable arguments. If there are variable arguments, then
1799 we won't pass anything in registers */
1803 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1804 param != 0; param = next_param)
1806 next_param = TREE_CHAIN (param);
1807 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1814 cum->maybe_vaarg = true;
1818 if ((!fntype && !libname)
1819 || (fntype && !TYPE_ARG_TYPES (fntype)))
1820 cum->maybe_vaarg = 1;
1822 if (TARGET_DEBUG_ARG)
1823 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1828 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1829 of this code is to classify each 8bytes of incoming argument by the register
1830 class and assign registers accordingly. */
1832 /* Return the union class of CLASS1 and CLASS2.
1833 See the x86-64 PS ABI for details. */
1835 static enum x86_64_reg_class
1836 merge_classes (class1, class2)
1837 enum x86_64_reg_class class1, class2;
1839 /* Rule #1: If both classes are equal, this is the resulting class. */
1840 if (class1 == class2)
1843 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1845 if (class1 == X86_64_NO_CLASS)
1847 if (class2 == X86_64_NO_CLASS)
1850 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1851 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1852 return X86_64_MEMORY_CLASS;
1854 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1855 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1856 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1857 return X86_64_INTEGERSI_CLASS;
1858 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1859 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1860 return X86_64_INTEGER_CLASS;
1862 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1863 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1864 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1865 return X86_64_MEMORY_CLASS;
1867 /* Rule #6: Otherwise class SSE is used. */
1868 return X86_64_SSE_CLASS;
1871 /* Classify the argument of type TYPE and mode MODE.
1872 CLASSES will be filled by the register class used to pass each word
1873 of the operand. The number of words is returned. In case the parameter
1874 should be passed in memory, 0 is returned. As a special case for zero
1875 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1877 BIT_OFFSET is used internally for handling records and specifies offset
1878 of the offset in bits modulo 256 to avoid overflow cases.
1880 See the x86-64 PS ABI for details.
1884 classify_argument (mode, type, classes, bit_offset)
1885 enum machine_mode mode;
1887 enum x86_64_reg_class classes[MAX_CLASSES];
1891 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1892 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1894 /* Variable sized entities are always passed/returned in memory. */
1898 if (type && AGGREGATE_TYPE_P (type))
1902 enum x86_64_reg_class subclasses[MAX_CLASSES];
1904 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1908 for (i = 0; i < words; i++)
1909 classes[i] = X86_64_NO_CLASS;
1911 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1912 signalize memory class, so handle it as special case. */
1915 classes[0] = X86_64_NO_CLASS;
1919 /* Classify each field of record and merge classes. */
1920 if (TREE_CODE (type) == RECORD_TYPE)
1922 /* For classes first merge in the field of the subclasses. */
1923 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1925 tree bases = TYPE_BINFO_BASETYPES (type);
1926 int n_bases = TREE_VEC_LENGTH (bases);
1929 for (i = 0; i < n_bases; ++i)
1931 tree binfo = TREE_VEC_ELT (bases, i);
1933 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1934 tree type = BINFO_TYPE (binfo);
1936 num = classify_argument (TYPE_MODE (type),
1938 (offset + bit_offset) % 256);
1941 for (i = 0; i < num; i++)
1943 int pos = (offset + (bit_offset % 64)) / 8 / 8;
1945 merge_classes (subclasses[i], classes[i + pos]);
1949 /* And now merge the fields of structure. */
1950 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1952 if (TREE_CODE (field) == FIELD_DECL)
1956 /* Bitfields are always classified as integer. Handle them
1957 early, since later code would consider them to be
1958 misaligned integers. */
1959 if (DECL_BIT_FIELD (field))
1961 for (i = int_bit_position (field) / 8 / 8;
1962 i < (int_bit_position (field)
1963 + tree_low_cst (DECL_SIZE (field), 0)
1966 merge_classes (X86_64_INTEGER_CLASS,
1971 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1972 TREE_TYPE (field), subclasses,
1973 (int_bit_position (field)
1974 + bit_offset) % 256);
1977 for (i = 0; i < num; i++)
1980 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
1982 merge_classes (subclasses[i], classes[i + pos]);
1988 /* Arrays are handled as small records. */
1989 else if (TREE_CODE (type) == ARRAY_TYPE)
1992 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1993 TREE_TYPE (type), subclasses, bit_offset);
1997 /* The partial classes are now full classes. */
1998 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1999 subclasses[0] = X86_64_SSE_CLASS;
2000 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2001 subclasses[0] = X86_64_INTEGER_CLASS;
2003 for (i = 0; i < words; i++)
2004 classes[i] = subclasses[i % num];
2006 /* Unions are similar to RECORD_TYPE but offset is always 0. */
2007 else if (TREE_CODE (type) == UNION_TYPE
2008 || TREE_CODE (type) == QUAL_UNION_TYPE)
2010 /* For classes first merge in the field of the subclasses. */
2011 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2013 tree bases = TYPE_BINFO_BASETYPES (type);
2014 int n_bases = TREE_VEC_LENGTH (bases);
2017 for (i = 0; i < n_bases; ++i)
2019 tree binfo = TREE_VEC_ELT (bases, i);
2021 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2022 tree type = BINFO_TYPE (binfo);
2024 num = classify_argument (TYPE_MODE (type),
2026 (offset + (bit_offset % 64)) % 256);
2029 for (i = 0; i < num; i++)
2031 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2033 merge_classes (subclasses[i], classes[i + pos]);
2037 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2039 if (TREE_CODE (field) == FIELD_DECL)
2042 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2043 TREE_TYPE (field), subclasses,
2047 for (i = 0; i < num; i++)
2048 classes[i] = merge_classes (subclasses[i], classes[i]);
2055 /* Final merger cleanup. */
2056 for (i = 0; i < words; i++)
2058 /* If one class is MEMORY, everything should be passed in
2060 if (classes[i] == X86_64_MEMORY_CLASS)
2063 /* The X86_64_SSEUP_CLASS should be always preceded by
2064 X86_64_SSE_CLASS. */
2065 if (classes[i] == X86_64_SSEUP_CLASS
2066 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2067 classes[i] = X86_64_SSE_CLASS;
2069 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2070 if (classes[i] == X86_64_X87UP_CLASS
2071 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2072 classes[i] = X86_64_SSE_CLASS;
2077 /* Compute alignment needed. We align all types to natural boundaries with
2078 exception of XFmode that is aligned to 64bits. */
2079 if (mode != VOIDmode && mode != BLKmode)
2081 int mode_alignment = GET_MODE_BITSIZE (mode);
2084 mode_alignment = 128;
2085 else if (mode == XCmode)
2086 mode_alignment = 256;
2087 /* Misaligned fields are always returned in memory. */
2088 if (bit_offset % mode_alignment)
2092 /* Classification of atomic types. */
2102 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2103 classes[0] = X86_64_INTEGERSI_CLASS;
2105 classes[0] = X86_64_INTEGER_CLASS;
2109 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2112 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2113 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2116 if (!(bit_offset % 64))
2117 classes[0] = X86_64_SSESF_CLASS;
2119 classes[0] = X86_64_SSE_CLASS;
2122 classes[0] = X86_64_SSEDF_CLASS;
2125 classes[0] = X86_64_X87_CLASS;
2126 classes[1] = X86_64_X87UP_CLASS;
2129 classes[0] = X86_64_X87_CLASS;
2130 classes[1] = X86_64_X87UP_CLASS;
2131 classes[2] = X86_64_X87_CLASS;
2132 classes[3] = X86_64_X87UP_CLASS;
2135 classes[0] = X86_64_SSEDF_CLASS;
2136 classes[1] = X86_64_SSEDF_CLASS;
2139 classes[0] = X86_64_SSE_CLASS;
2147 classes[0] = X86_64_SSE_CLASS;
2148 classes[1] = X86_64_SSEUP_CLASS;
2163 /* Examine the argument and return set number of register required in each
2164 class. Return 0 iff parameter should be passed in memory. */
2166 examine_argument (mode, type, in_return, int_nregs, sse_nregs)
2167 enum machine_mode mode;
2169 int *int_nregs, *sse_nregs;
2172 enum x86_64_reg_class class[MAX_CLASSES];
2173 int n = classify_argument (mode, type, class, 0);
2179 for (n--; n >= 0; n--)
2182 case X86_64_INTEGER_CLASS:
2183 case X86_64_INTEGERSI_CLASS:
2186 case X86_64_SSE_CLASS:
2187 case X86_64_SSESF_CLASS:
2188 case X86_64_SSEDF_CLASS:
2191 case X86_64_NO_CLASS:
2192 case X86_64_SSEUP_CLASS:
2194 case X86_64_X87_CLASS:
2195 case X86_64_X87UP_CLASS:
2199 case X86_64_MEMORY_CLASS:
2204 /* Construct container for the argument used by GCC interface. See
2205 FUNCTION_ARG for the detailed description. */
2207 construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
2208 enum machine_mode mode;
2211 int nintregs, nsseregs;
2215 enum machine_mode tmpmode;
2217 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2218 enum x86_64_reg_class class[MAX_CLASSES];
2222 int needed_sseregs, needed_intregs;
2223 rtx exp[MAX_CLASSES];
2226 n = classify_argument (mode, type, class, 0);
2227 if (TARGET_DEBUG_ARG)
2230 fprintf (stderr, "Memory class\n");
2233 fprintf (stderr, "Classes:");
2234 for (i = 0; i < n; i++)
2236 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2238 fprintf (stderr, "\n");
2243 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2245 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2248 /* First construct simple cases. Avoid SCmode, since we want to use
2249 single register to pass this type. */
2250 if (n == 1 && mode != SCmode)
2253 case X86_64_INTEGER_CLASS:
2254 case X86_64_INTEGERSI_CLASS:
2255 return gen_rtx_REG (mode, intreg[0]);
2256 case X86_64_SSE_CLASS:
2257 case X86_64_SSESF_CLASS:
2258 case X86_64_SSEDF_CLASS:
2259 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2260 case X86_64_X87_CLASS:
2261 return gen_rtx_REG (mode, FIRST_STACK_REG);
2262 case X86_64_NO_CLASS:
2263 /* Zero sized array, struct or class. */
2268 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
2269 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2271 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2272 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2273 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2274 && class[1] == X86_64_INTEGER_CLASS
2275 && (mode == CDImode || mode == TImode)
2276 && intreg[0] + 1 == intreg[1])
2277 return gen_rtx_REG (mode, intreg[0]);
2279 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2280 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2281 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2283 /* Otherwise figure out the entries of the PARALLEL. */
2284 for (i = 0; i < n; i++)
2288 case X86_64_NO_CLASS:
2290 case X86_64_INTEGER_CLASS:
2291 case X86_64_INTEGERSI_CLASS:
2292 /* Merge TImodes on aligned occasions here too. */
2293 if (i * 8 + 8 > bytes)
2294 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2295 else if (class[i] == X86_64_INTEGERSI_CLASS)
2299 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2300 if (tmpmode == BLKmode)
2302 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2303 gen_rtx_REG (tmpmode, *intreg),
2307 case X86_64_SSESF_CLASS:
2308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2309 gen_rtx_REG (SFmode,
2310 SSE_REGNO (sse_regno)),
2314 case X86_64_SSEDF_CLASS:
2315 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2316 gen_rtx_REG (DFmode,
2317 SSE_REGNO (sse_regno)),
2321 case X86_64_SSE_CLASS:
2322 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2326 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2327 gen_rtx_REG (tmpmode,
2328 SSE_REGNO (sse_regno)),
2330 if (tmpmode == TImode)
2338 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2339 for (i = 0; i < nexps; i++)
2340 XVECEXP (ret, 0, i) = exp [i];
2344 /* Update the data in CUM to advance over an argument
2345 of mode MODE and data type TYPE.
2346 (TYPE is null for libcalls where that information may not be available.) */
2349 function_arg_advance (cum, mode, type, named)
2350 CUMULATIVE_ARGS *cum; /* current arg information */
2351 enum machine_mode mode; /* current arg mode */
2352 tree type; /* type of the argument or 0 if lib support */
2353 int named; /* whether or not the argument was named */
2356 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2357 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2359 if (TARGET_DEBUG_ARG)
2361 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
2362 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2365 int int_nregs, sse_nregs;
2366 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2367 cum->words += words;
2368 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2370 cum->nregs -= int_nregs;
2371 cum->sse_nregs -= sse_nregs;
2372 cum->regno += int_nregs;
2373 cum->sse_regno += sse_nregs;
2376 cum->words += words;
2380 if (TARGET_SSE && mode == TImode)
2382 cum->sse_words += words;
2383 cum->sse_nregs -= 1;
2384 cum->sse_regno += 1;
2385 if (cum->sse_nregs <= 0)
2393 cum->words += words;
2394 cum->nregs -= words;
2395 cum->regno += words;
2397 if (cum->nregs <= 0)
2407 /* Define where to put the arguments to a function.
2408 Value is zero to push the argument on the stack,
2409 or a hard register in which to store the argument.
2411 MODE is the argument's machine mode.
2412 TYPE is the data type of the argument (as a tree).
2413 This is null for libcalls where that information may
2415 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2416 the preceding args and about the function being called.
2417 NAMED is nonzero if this argument is a named parameter
2418 (otherwise it is an extra parameter matching an ellipsis). */
2421 function_arg (cum, mode, type, named)
2422 CUMULATIVE_ARGS *cum; /* current arg information */
2423 enum machine_mode mode; /* current arg mode */
2424 tree type; /* type of the argument or 0 if lib support */
2425 int named; /* != 0 for normal args, == 0 for ... args */
2429 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2430 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2432 /* Handle a hidden AL argument containing number of registers for varargs
2433 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2435 if (mode == VOIDmode)
2438 return GEN_INT (cum->maybe_vaarg
2439 ? (cum->sse_nregs < 0
2447 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2448 &x86_64_int_parameter_registers [cum->regno],
2453 /* For now, pass fp/complex values on the stack. */
2462 if (words <= cum->nregs)
2464 int regno = cum->regno;
2466 /* Fastcall allocates the first two DWORD (SImode) or
2467 smaller arguments to ECX and EDX. */
2470 if (mode == BLKmode || mode == DImode)
2473 /* ECX not EAX is the first allocated register. */
2477 ret = gen_rtx_REG (mode, regno);
2482 ret = gen_rtx_REG (mode, cum->sse_regno);
2486 if (TARGET_DEBUG_ARG)
2489 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2490 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2493 print_simple_rtl (stderr, ret);
2495 fprintf (stderr, ", stack");
2497 fprintf (stderr, " )\n");
2503 /* A C expression that indicates when an argument must be passed by
2504 reference. If nonzero for an argument, a copy of that argument is
2505 made in memory and a pointer to the argument is passed instead of
2506 the argument itself. The pointer is passed in whatever way is
2507 appropriate for passing a pointer to that type. */
2510 function_arg_pass_by_reference (cum, mode, type, named)
2511 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
2512 enum machine_mode mode ATTRIBUTE_UNUSED;
2514 int named ATTRIBUTE_UNUSED;
2519 if (type && int_size_in_bytes (type) == -1)
2521 if (TARGET_DEBUG_ARG)
2522 fprintf (stderr, "function_arg_pass_by_reference\n");
2529 /* Gives the alignment boundary, in bits, of an argument with the specified mode
2533 ix86_function_arg_boundary (mode, type)
2534 enum machine_mode mode;
2539 return PARM_BOUNDARY;
2541 align = TYPE_ALIGN (type);
2543 align = GET_MODE_ALIGNMENT (mode);
2544 if (align < PARM_BOUNDARY)
2545 align = PARM_BOUNDARY;
2551 /* Return true if N is a possible register number of function value. */
2553 ix86_function_value_regno_p (regno)
2558 return ((regno) == 0
2559 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2560 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2562 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2563 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2564 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2567 /* Define how to find the value returned by a function.
2568 VALTYPE is the data type of the value (as a tree).
2569 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2570 otherwise, FUNC is 0. */
2572 ix86_function_value (valtype)
2577 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2578 REGPARM_MAX, SSE_REGPARM_MAX,
2579 x86_64_int_return_registers, 0);
2580 /* For zero sized structures, construct_container return NULL, but we need
2581 to keep rest of compiler happy by returning meaningful value. */
2583 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2587 return gen_rtx_REG (TYPE_MODE (valtype),
2588 ix86_value_regno (TYPE_MODE (valtype)));
2591 /* Return false iff type is returned in memory. */
2593 ix86_return_in_memory (type)
2596 int needed_intregs, needed_sseregs;
2599 return !examine_argument (TYPE_MODE (type), type, 1,
2600 &needed_intregs, &needed_sseregs);
2604 if (TYPE_MODE (type) == BLKmode
2605 || (VECTOR_MODE_P (TYPE_MODE (type))
2606 && int_size_in_bytes (type) == 8)
2607 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2608 && TYPE_MODE (type) != TFmode
2609 && !VECTOR_MODE_P (TYPE_MODE (type))))
2615 /* Define how to find the value returned by a library function
2616 assuming the value has mode MODE. */
2618 ix86_libcall_value (mode)
2619 enum machine_mode mode;
2629 return gen_rtx_REG (mode, FIRST_SSE_REG);
2632 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2634 return gen_rtx_REG (mode, 0);
2638 return gen_rtx_REG (mode, ix86_value_regno (mode));
2641 /* Given a mode, return the register to use for a return value. */
2644 ix86_value_regno (mode)
2645 enum machine_mode mode;
2647 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2648 return FIRST_FLOAT_REG;
2649 if (mode == TImode || VECTOR_MODE_P (mode))
2650 return FIRST_SSE_REG;
2654 /* Create the va_list data type. */
2657 ix86_build_va_list ()
2659 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2661 /* For i386 we use plain pointer to argument area. */
2663 return build_pointer_type (char_type_node);
2665 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2666 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2668 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2669 unsigned_type_node);
2670 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2671 unsigned_type_node);
2672 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2674 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2677 DECL_FIELD_CONTEXT (f_gpr) = record;
2678 DECL_FIELD_CONTEXT (f_fpr) = record;
2679 DECL_FIELD_CONTEXT (f_ovf) = record;
2680 DECL_FIELD_CONTEXT (f_sav) = record;
2682 TREE_CHAIN (record) = type_decl;
2683 TYPE_NAME (record) = type_decl;
2684 TYPE_FIELDS (record) = f_gpr;
2685 TREE_CHAIN (f_gpr) = f_fpr;
2686 TREE_CHAIN (f_fpr) = f_ovf;
2687 TREE_CHAIN (f_ovf) = f_sav;
2689 layout_type (record);
2691 /* The correct type is an array type of one element. */
2692 return build_array_type (record, build_index_type (size_zero_node));
2695 /* Perform any needed actions needed for a function that is receiving a
2696 variable number of arguments.
2700 MODE and TYPE are the mode and type of the current parameter.
2702 PRETEND_SIZE is a variable that should be set to the amount of stack
2703 that must be pushed by the prolog to pretend that our caller pushed
2706 Normally, this macro will push all remaining incoming registers on the
2707 stack and set PRETEND_SIZE to the length of the registers pushed. */
2710 ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2711 CUMULATIVE_ARGS *cum;
2712 enum machine_mode mode;
2714 int *pretend_size ATTRIBUTE_UNUSED;
2718 CUMULATIVE_ARGS next_cum;
2719 rtx save_area = NULL_RTX, mem;
2732 /* Indicate to allocate space on the stack for varargs save area. */
2733 ix86_save_varrargs_registers = 1;
2735 fntype = TREE_TYPE (current_function_decl);
2736 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2737 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2738 != void_type_node));
2740 /* For varargs, we do not want to skip the dummy va_dcl argument.
2741 For stdargs, we do want to skip the last named argument. */
2744 function_arg_advance (&next_cum, mode, type, 1);
2747 save_area = frame_pointer_rtx;
2749 set = get_varargs_alias_set ();
2751 for (i = next_cum.regno; i < ix86_regparm; i++)
2753 mem = gen_rtx_MEM (Pmode,
2754 plus_constant (save_area, i * UNITS_PER_WORD));
2755 set_mem_alias_set (mem, set);
2756 emit_move_insn (mem, gen_rtx_REG (Pmode,
2757 x86_64_int_parameter_registers[i]));
2760 if (next_cum.sse_nregs)
2762 /* Now emit code to save SSE registers. The AX parameter contains number
2763 of SSE parameter registers used to call this function. We use
2764 sse_prologue_save insn template that produces computed jump across
2765 SSE saves. We need some preparation work to get this working. */
2767 label = gen_label_rtx ();
2768 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2770 /* Compute address to jump to :
2771 label - 5*eax + nnamed_sse_arguments*5 */
2772 tmp_reg = gen_reg_rtx (Pmode);
2773 nsse_reg = gen_reg_rtx (Pmode);
2774 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2775 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2776 gen_rtx_MULT (Pmode, nsse_reg,
2778 if (next_cum.sse_regno)
2781 gen_rtx_CONST (DImode,
2782 gen_rtx_PLUS (DImode,
2784 GEN_INT (next_cum.sse_regno * 4))));
2786 emit_move_insn (nsse_reg, label_ref);
2787 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2789 /* Compute address of memory block we save into. We always use pointer
2790 pointing 127 bytes after first byte to store - this is needed to keep
2791 instruction size limited by 4 bytes. */
2792 tmp_reg = gen_reg_rtx (Pmode);
2793 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2794 plus_constant (save_area,
2795 8 * REGPARM_MAX + 127)));
2796 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
2797 set_mem_alias_set (mem, set);
2798 set_mem_align (mem, BITS_PER_WORD);
2800 /* And finally do the dirty job! */
2801 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2802 GEN_INT (next_cum.sse_regno), label));
2807 /* Implement va_start. */
2810 ix86_va_start (valist, nextarg)
2814 HOST_WIDE_INT words, n_gpr, n_fpr;
2815 tree f_gpr, f_fpr, f_ovf, f_sav;
2816 tree gpr, fpr, ovf, sav, t;
2818 /* Only 64bit target needs something special. */
2821 std_expand_builtin_va_start (valist, nextarg);
2825 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2826 f_fpr = TREE_CHAIN (f_gpr);
2827 f_ovf = TREE_CHAIN (f_fpr);
2828 f_sav = TREE_CHAIN (f_ovf);
2830 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2831 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2832 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2833 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2834 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2836 /* Count number of gp and fp argument registers used. */
2837 words = current_function_args_info.words;
2838 n_gpr = current_function_args_info.regno;
2839 n_fpr = current_function_args_info.sse_regno;
2841 if (TARGET_DEBUG_ARG)
2842 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
2843 (int) words, (int) n_gpr, (int) n_fpr);
2845 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2846 build_int_2 (n_gpr * 8, 0));
2847 TREE_SIDE_EFFECTS (t) = 1;
2848 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2850 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2851 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2852 TREE_SIDE_EFFECTS (t) = 1;
2853 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2855 /* Find the overflow area. */
2856 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2858 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2859 build_int_2 (words * UNITS_PER_WORD, 0));
2860 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2861 TREE_SIDE_EFFECTS (t) = 1;
2862 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2864 /* Find the register save area.
2865 Prologue of the function save it right above stack frame. */
2866 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2867 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2868 TREE_SIDE_EFFECTS (t) = 1;
2869 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2872 /* Implement va_arg. */
2874 ix86_va_arg (valist, type)
2877 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
2878 tree f_gpr, f_fpr, f_ovf, f_sav;
2879 tree gpr, fpr, ovf, sav, t;
2881 rtx lab_false, lab_over = NULL_RTX;
2886 /* Only 64bit target needs something special. */
2889 return std_expand_builtin_va_arg (valist, type);
2892 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2893 f_fpr = TREE_CHAIN (f_gpr);
2894 f_ovf = TREE_CHAIN (f_fpr);
2895 f_sav = TREE_CHAIN (f_ovf);
2897 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2898 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2899 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2900 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2901 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2903 size = int_size_in_bytes (type);
2906 /* Passed by reference. */
2908 type = build_pointer_type (type);
2909 size = int_size_in_bytes (type);
2911 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2913 container = construct_container (TYPE_MODE (type), type, 0,
2914 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2916 * Pull the value out of the saved registers ...
2919 addr_rtx = gen_reg_rtx (Pmode);
2923 rtx int_addr_rtx, sse_addr_rtx;
2924 int needed_intregs, needed_sseregs;
2927 lab_over = gen_label_rtx ();
2928 lab_false = gen_label_rtx ();
2930 examine_argument (TYPE_MODE (type), type, 0,
2931 &needed_intregs, &needed_sseregs);
2934 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2935 || TYPE_ALIGN (type) > 128);
2937 /* In case we are passing structure, verify that it is consecutive block
2938 on the register save area. If not we need to do moves. */
2939 if (!need_temp && !REG_P (container))
2941 /* Verify that all registers are strictly consecutive */
2942 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2946 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2948 rtx slot = XVECEXP (container, 0, i);
2949 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
2950 || INTVAL (XEXP (slot, 1)) != i * 16)
2958 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2960 rtx slot = XVECEXP (container, 0, i);
2961 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
2962 || INTVAL (XEXP (slot, 1)) != i * 8)
2969 int_addr_rtx = addr_rtx;
2970 sse_addr_rtx = addr_rtx;
2974 int_addr_rtx = gen_reg_rtx (Pmode);
2975 sse_addr_rtx = gen_reg_rtx (Pmode);
2977 /* First ensure that we fit completely in registers. */
2980 emit_cmp_and_jump_insns (expand_expr
2981 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2982 GEN_INT ((REGPARM_MAX - needed_intregs +
2983 1) * 8), GE, const1_rtx, SImode,
2988 emit_cmp_and_jump_insns (expand_expr
2989 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2990 GEN_INT ((SSE_REGPARM_MAX -
2991 needed_sseregs + 1) * 16 +
2992 REGPARM_MAX * 8), GE, const1_rtx,
2993 SImode, 1, lab_false);
2996 /* Compute index to start of area used for integer regs. */
2999 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3000 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3001 if (r != int_addr_rtx)
3002 emit_move_insn (int_addr_rtx, r);
3006 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3007 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3008 if (r != sse_addr_rtx)
3009 emit_move_insn (sse_addr_rtx, r);
3016 /* Never use the memory itself, as it has the alias set. */
3017 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
3018 mem = gen_rtx_MEM (BLKmode, addr_rtx);
3019 set_mem_alias_set (mem, get_varargs_alias_set ());
3020 set_mem_align (mem, BITS_PER_UNIT);
3022 for (i = 0; i < XVECLEN (container, 0); i++)
3024 rtx slot = XVECEXP (container, 0, i);
3025 rtx reg = XEXP (slot, 0);
3026 enum machine_mode mode = GET_MODE (reg);
3032 if (SSE_REGNO_P (REGNO (reg)))
3034 src_addr = sse_addr_rtx;
3035 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3039 src_addr = int_addr_rtx;
3040 src_offset = REGNO (reg) * 8;
3042 src_mem = gen_rtx_MEM (mode, src_addr);
3043 set_mem_alias_set (src_mem, get_varargs_alias_set ());
3044 src_mem = adjust_address (src_mem, mode, src_offset);
3045 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3046 emit_move_insn (dest_mem, src_mem);
3053 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3054 build_int_2 (needed_intregs * 8, 0));
3055 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3056 TREE_SIDE_EFFECTS (t) = 1;
3057 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3062 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3063 build_int_2 (needed_sseregs * 16, 0));
3064 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3065 TREE_SIDE_EFFECTS (t) = 1;
3066 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3069 emit_jump_insn (gen_jump (lab_over));
3071 emit_label (lab_false);
3074 /* ... otherwise out of the overflow area. */
3076 /* Care for on-stack alignment if needed. */
3077 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3081 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3082 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3083 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3087 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3089 emit_move_insn (addr_rtx, r);
3092 build (PLUS_EXPR, TREE_TYPE (t), t,
3093 build_int_2 (rsize * UNITS_PER_WORD, 0));
3094 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3095 TREE_SIDE_EFFECTS (t) = 1;
3096 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3099 emit_label (lab_over);
3103 r = gen_rtx_MEM (Pmode, addr_rtx);
3104 set_mem_alias_set (r, get_varargs_alias_set ());
3105 emit_move_insn (addr_rtx, r);
3111 /* Return nonzero if OP is either a i387 or SSE fp register. */
3113 any_fp_register_operand (op, mode)
3115 enum machine_mode mode ATTRIBUTE_UNUSED;
3117 return ANY_FP_REG_P (op);
3120 /* Return nonzero if OP is an i387 fp register. */
3122 fp_register_operand (op, mode)
3124 enum machine_mode mode ATTRIBUTE_UNUSED;
3126 return FP_REG_P (op);
3129 /* Return nonzero if OP is a non-fp register_operand. */
3131 register_and_not_any_fp_reg_operand (op, mode)
3133 enum machine_mode mode;
3135 return register_operand (op, mode) && !ANY_FP_REG_P (op);
3138 /* Return nonzero if OP is a register operand other than an
3139 i387 fp register. */
3141 register_and_not_fp_reg_operand (op, mode)
3143 enum machine_mode mode;
3145 return register_operand (op, mode) && !FP_REG_P (op);
3148 /* Return nonzero if OP is general operand representable on x86_64. */
3151 x86_64_general_operand (op, mode)
3153 enum machine_mode mode;
3156 return general_operand (op, mode);
3157 if (nonimmediate_operand (op, mode))
3159 return x86_64_sign_extended_value (op);
3162 /* Return nonzero if OP is general operand representable on x86_64
3163 as either sign extended or zero extended constant. */
3166 x86_64_szext_general_operand (op, mode)
3168 enum machine_mode mode;
3171 return general_operand (op, mode);
3172 if (nonimmediate_operand (op, mode))
3174 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3177 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3180 x86_64_nonmemory_operand (op, mode)
3182 enum machine_mode mode;
3185 return nonmemory_operand (op, mode);
3186 if (register_operand (op, mode))
3188 return x86_64_sign_extended_value (op);
3191 /* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
3194 x86_64_movabs_operand (op, mode)
3196 enum machine_mode mode;
3198 if (!TARGET_64BIT || !flag_pic)
3199 return nonmemory_operand (op, mode);
3200 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3202 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3207 /* Return nonzero if OP is nonmemory operand representable on x86_64. */
3210 x86_64_szext_nonmemory_operand (op, mode)
3212 enum machine_mode mode;
3215 return nonmemory_operand (op, mode);
3216 if (register_operand (op, mode))
3218 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3221 /* Return nonzero if OP is immediate operand representable on x86_64. */
3224 x86_64_immediate_operand (op, mode)
3226 enum machine_mode mode;
3229 return immediate_operand (op, mode);
3230 return x86_64_sign_extended_value (op);
3233 /* Return nonzero if OP is immediate operand representable on x86_64. */
3236 x86_64_zext_immediate_operand (op, mode)
3238 enum machine_mode mode ATTRIBUTE_UNUSED;
3240 return x86_64_zero_extended_value (op);
3243 /* Return nonzero if OP is (const_int 1), else return zero. */
3246 const_int_1_operand (op, mode)
3248 enum machine_mode mode ATTRIBUTE_UNUSED;
3250 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
3253 /* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3254 for shift & compare patterns, as shifting by 0 does not change flags),
3255 else return zero. */
3258 const_int_1_31_operand (op, mode)
3260 enum machine_mode mode ATTRIBUTE_UNUSED;
3262 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3265 /* Returns 1 if OP is either a symbol reference or a sum of a symbol
3266 reference and a constant. */
3269 symbolic_operand (op, mode)
3271 enum machine_mode mode ATTRIBUTE_UNUSED;
3273 switch (GET_CODE (op))
3281 if (GET_CODE (op) == SYMBOL_REF
3282 || GET_CODE (op) == LABEL_REF
3283 || (GET_CODE (op) == UNSPEC
3284 && (XINT (op, 1) == UNSPEC_GOT
3285 || XINT (op, 1) == UNSPEC_GOTOFF
3286 || XINT (op, 1) == UNSPEC_GOTPCREL)))
3288 if (GET_CODE (op) != PLUS
3289 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3293 if (GET_CODE (op) == SYMBOL_REF
3294 || GET_CODE (op) == LABEL_REF)
3296 /* Only @GOTOFF gets offsets. */
3297 if (GET_CODE (op) != UNSPEC
3298 || XINT (op, 1) != UNSPEC_GOTOFF)
3301 op = XVECEXP (op, 0, 0);
3302 if (GET_CODE (op) == SYMBOL_REF
3303 || GET_CODE (op) == LABEL_REF)
3312 /* Return true if the operand contains a @GOT or @GOTOFF reference. */
3315 pic_symbolic_operand (op, mode)
3317 enum machine_mode mode ATTRIBUTE_UNUSED;
3319 if (GET_CODE (op) != CONST)
3324 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3329 if (GET_CODE (op) == UNSPEC)
3331 if (GET_CODE (op) != PLUS
3332 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3335 if (GET_CODE (op) == UNSPEC)
3341 /* Return true if OP is a symbolic operand that resolves locally. */
3344 local_symbolic_operand (op, mode)
3346 enum machine_mode mode ATTRIBUTE_UNUSED;
3348 if (GET_CODE (op) == CONST
3349 && GET_CODE (XEXP (op, 0)) == PLUS
3350 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3351 op = XEXP (XEXP (op, 0), 0);
3353 if (GET_CODE (op) == LABEL_REF)
3356 if (GET_CODE (op) != SYMBOL_REF)
3359 /* These we've been told are local by varasm and encode_section_info
3361 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3364 /* There is, however, a not insubstantial body of code in the rest of
3365 the compiler that assumes it can just stick the results of
3366 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3367 /* ??? This is a hack. Should update the body of the compiler to
3368 always create a DECL an invoke targetm.encode_section_info. */
3369 if (strncmp (XSTR (op, 0), internal_label_prefix,
3370 internal_label_prefix_len) == 0)
3376 /* Test for various thread-local symbols. See ix86_encode_section_info. */
3379 tls_symbolic_operand (op, mode)
3381 enum machine_mode mode ATTRIBUTE_UNUSED;
3383 const char *symbol_str;
3385 if (GET_CODE (op) != SYMBOL_REF)
3387 symbol_str = XSTR (op, 0);
3389 if (symbol_str[0] != '%')
3391 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
3395 tls_symbolic_operand_1 (op, kind)
3397 enum tls_model kind;
3399 const char *symbol_str;
3401 if (GET_CODE (op) != SYMBOL_REF)
3403 symbol_str = XSTR (op, 0);
3405 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3409 global_dynamic_symbolic_operand (op, mode)
3411 enum machine_mode mode ATTRIBUTE_UNUSED;
3413 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3417 local_dynamic_symbolic_operand (op, mode)
3419 enum machine_mode mode ATTRIBUTE_UNUSED;
3421 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3425 initial_exec_symbolic_operand (op, mode)
3427 enum machine_mode mode ATTRIBUTE_UNUSED;
3429 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3433 local_exec_symbolic_operand (op, mode)
3435 enum machine_mode mode ATTRIBUTE_UNUSED;
3437 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3440 /* Test for a valid operand for a call instruction. Don't allow the
3441 arg pointer register or virtual regs since they may decay into
3442 reg + const, which the patterns can't handle. */
3445 call_insn_operand (op, mode)
3447 enum machine_mode mode ATTRIBUTE_UNUSED;
3449 /* Disallow indirect through a virtual register. This leads to
3450 compiler aborts when trying to eliminate them. */
3451 if (GET_CODE (op) == REG
3452 && (op == arg_pointer_rtx
3453 || op == frame_pointer_rtx
3454 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3455 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3458 /* Disallow `call 1234'. Due to varying assembler lameness this
3459 gets either rejected or translated to `call .+1234'. */
3460 if (GET_CODE (op) == CONST_INT)
3463 /* Explicitly allow SYMBOL_REF even if pic. */
3464 if (GET_CODE (op) == SYMBOL_REF)
3467 /* Otherwise we can allow any general_operand in the address. */
3468 return general_operand (op, Pmode);
3471 /* Test for a valid operand for a call instruction. Don't allow the
3472 arg pointer register or virtual regs since they may decay into
3473 reg + const, which the patterns can't handle. */
3476 sibcall_insn_operand (op, mode)
3478 enum machine_mode mode ATTRIBUTE_UNUSED;
3480 /* Disallow indirect through a virtual register. This leads to
3481 compiler aborts when trying to eliminate them. */
3482 if (GET_CODE (op) == REG
3483 && (op == arg_pointer_rtx
3484 || op == frame_pointer_rtx
3485 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3486 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3489 /* Explicitly allow SYMBOL_REF even if pic. */
3490 if (GET_CODE (op) == SYMBOL_REF)
3493 /* Otherwise we can only allow register operands. */
3494 return register_operand (op, Pmode);
3498 constant_call_address_operand (op, mode)
3500 enum machine_mode mode ATTRIBUTE_UNUSED;
3502 if (GET_CODE (op) == CONST
3503 && GET_CODE (XEXP (op, 0)) == PLUS
3504 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3505 op = XEXP (XEXP (op, 0), 0);
3506 return GET_CODE (op) == SYMBOL_REF;
3509 /* Match exactly zero and one. */
3512 const0_operand (op, mode)
3514 enum machine_mode mode;
3516 return op == CONST0_RTX (mode);
3520 const1_operand (op, mode)
3522 enum machine_mode mode ATTRIBUTE_UNUSED;
3524 return op == const1_rtx;
3527 /* Match 2, 4, or 8. Used for leal multiplicands. */
3530 const248_operand (op, mode)
3532 enum machine_mode mode ATTRIBUTE_UNUSED;
3534 return (GET_CODE (op) == CONST_INT
3535 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3538 /* True if this is a constant appropriate for an increment or decrement. */
3541 incdec_operand (op, mode)
3543 enum machine_mode mode ATTRIBUTE_UNUSED;
3545 /* On Pentium4, the inc and dec operations causes extra dependency on flag
3546 registers, since carry flag is not set. */
3547 if (TARGET_PENTIUM4 && !optimize_size)
3549 return op == const1_rtx || op == constm1_rtx;
3552 /* Return nonzero if OP is acceptable as operand of DImode shift
3556 shiftdi_operand (op, mode)
3558 enum machine_mode mode ATTRIBUTE_UNUSED;
3561 return nonimmediate_operand (op, mode);
3563 return register_operand (op, mode);
3566 /* Return false if this is the stack pointer, or any other fake
3567 register eliminable to the stack pointer. Otherwise, this is
3570 This is used to prevent esp from being used as an index reg.
3571 Which would only happen in pathological cases. */
3574 reg_no_sp_operand (op, mode)
3576 enum machine_mode mode;
3579 if (GET_CODE (t) == SUBREG)
3581 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3584 return register_operand (op, mode);
3588 mmx_reg_operand (op, mode)
3590 enum machine_mode mode ATTRIBUTE_UNUSED;
3592 return MMX_REG_P (op);
3595 /* Return false if this is any eliminable register. Otherwise
3599 general_no_elim_operand (op, mode)
3601 enum machine_mode mode;
3604 if (GET_CODE (t) == SUBREG)
3606 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3607 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3608 || t == virtual_stack_dynamic_rtx)
3611 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3612 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3615 return general_operand (op, mode);
3618 /* Return false if this is any eliminable register. Otherwise
3619 register_operand or const_int. */
3622 nonmemory_no_elim_operand (op, mode)
3624 enum machine_mode mode;
3627 if (GET_CODE (t) == SUBREG)
3629 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3630 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3631 || t == virtual_stack_dynamic_rtx)
3634 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3637 /* Return false if this is any eliminable register or stack register,
3638 otherwise work like register_operand. */
3641 index_register_operand (op, mode)
3643 enum machine_mode mode;
3646 if (GET_CODE (t) == SUBREG)
3650 if (t == arg_pointer_rtx
3651 || t == frame_pointer_rtx
3652 || t == virtual_incoming_args_rtx
3653 || t == virtual_stack_vars_rtx
3654 || t == virtual_stack_dynamic_rtx
3655 || REGNO (t) == STACK_POINTER_REGNUM)
3658 return general_operand (op, mode);
3661 /* Return true if op is a Q_REGS class register. */
3664 q_regs_operand (op, mode)
3666 enum machine_mode mode;
3668 if (mode != VOIDmode && GET_MODE (op) != mode)
3670 if (GET_CODE (op) == SUBREG)
3671 op = SUBREG_REG (op);
3672 return ANY_QI_REG_P (op);
3675 /* Return true if op is an flags register. */
3678 flags_reg_operand (op, mode)
3680 enum machine_mode mode;
3682 if (mode != VOIDmode && GET_MODE (op) != mode)
3684 return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3687 /* Return true if op is a NON_Q_REGS class register. */
3690 non_q_regs_operand (op, mode)
3692 enum machine_mode mode;
3694 if (mode != VOIDmode && GET_MODE (op) != mode)
3696 if (GET_CODE (op) == SUBREG)
3697 op = SUBREG_REG (op);
3698 return NON_QI_REG_P (op);
3702 zero_extended_scalar_load_operand (op, mode)
3704 enum machine_mode mode ATTRIBUTE_UNUSED;
3707 if (GET_CODE (op) != MEM)
3709 op = maybe_get_pool_constant (op);
3712 if (GET_CODE (op) != CONST_VECTOR)
3715 (GET_MODE_SIZE (GET_MODE (op)) /
3716 GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3717 for (n_elts--; n_elts > 0; n_elts--)
3719 rtx elt = CONST_VECTOR_ELT (op, n_elts);
3720 if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3726 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3729 sse_comparison_operator (op, mode)
3731 enum machine_mode mode ATTRIBUTE_UNUSED;
3733 enum rtx_code code = GET_CODE (op);
3736 /* Operations supported directly. */
3746 /* These are equivalent to ones above in non-IEEE comparisons. */
3753 return !TARGET_IEEE_FP;
3758 /* Return 1 if OP is a valid comparison operator in valid mode. */
3760 ix86_comparison_operator (op, mode)
3762 enum machine_mode mode;
3764 enum machine_mode inmode;
3765 enum rtx_code code = GET_CODE (op);
3766 if (mode != VOIDmode && GET_MODE (op) != mode)
3768 if (GET_RTX_CLASS (code) != '<')
3770 inmode = GET_MODE (XEXP (op, 0));
3772 if (inmode == CCFPmode || inmode == CCFPUmode)
3774 enum rtx_code second_code, bypass_code;
3775 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3776 return (bypass_code == NIL && second_code == NIL);
3783 if (inmode == CCmode || inmode == CCGCmode
3784 || inmode == CCGOCmode || inmode == CCNOmode)
3787 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
3788 if (inmode == CCmode)
3792 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
3800 /* Return 1 if OP is a valid comparison operator testing carry flag
3803 ix86_carry_flag_operator (op, mode)
3805 enum machine_mode mode;
3807 enum machine_mode inmode;
3808 enum rtx_code code = GET_CODE (op);
3810 if (mode != VOIDmode && GET_MODE (op) != mode)
3812 if (GET_RTX_CLASS (code) != '<')
3814 inmode = GET_MODE (XEXP (op, 0));
3815 if (GET_CODE (XEXP (op, 0)) != REG
3816 || REGNO (XEXP (op, 0)) != 17
3817 || XEXP (op, 1) != const0_rtx)
3820 if (inmode == CCFPmode || inmode == CCFPUmode)
3822 enum rtx_code second_code, bypass_code;
3824 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3825 if (bypass_code != NIL || second_code != NIL)
3827 code = ix86_fp_compare_code_to_integer (code);
3829 else if (inmode != CCmode)
3834 /* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3837 fcmov_comparison_operator (op, mode)
3839 enum machine_mode mode;
3841 enum machine_mode inmode;
3842 enum rtx_code code = GET_CODE (op);
3844 if (mode != VOIDmode && GET_MODE (op) != mode)
3846 if (GET_RTX_CLASS (code) != '<')
3848 inmode = GET_MODE (XEXP (op, 0));
3849 if (inmode == CCFPmode || inmode == CCFPUmode)
3851 enum rtx_code second_code, bypass_code;
3853 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3854 if (bypass_code != NIL || second_code != NIL)
3856 code = ix86_fp_compare_code_to_integer (code);
3858 /* i387 supports just limited amount of conditional codes. */
3861 case LTU: case GTU: case LEU: case GEU:
3862 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
3865 case ORDERED: case UNORDERED:
3873 /* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3876 promotable_binary_operator (op, mode)
3878 enum machine_mode mode ATTRIBUTE_UNUSED;
3880 switch (GET_CODE (op))
3883 /* Modern CPUs have same latency for HImode and SImode multiply,
3884 but 386 and 486 do HImode multiply faster. */
3885 return ix86_cpu > PROCESSOR_I486;
3897 /* Nearly general operand, but accept any const_double, since we wish
3898 to be able to drop them into memory rather than have them get pulled
3902 cmp_fp_expander_operand (op, mode)
3904 enum machine_mode mode;
3906 if (mode != VOIDmode && mode != GET_MODE (op))
3908 if (GET_CODE (op) == CONST_DOUBLE)
3910 return general_operand (op, mode);
3913 /* Match an SI or HImode register for a zero_extract. */
3916 ext_register_operand (op, mode)
3918 enum machine_mode mode ATTRIBUTE_UNUSED;
3921 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3922 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
3925 if (!register_operand (op, VOIDmode))
3928 /* Be careful to accept only registers having upper parts. */
3929 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3930 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
3933 /* Return 1 if this is a valid binary floating-point operation.
3934 OP is the expression matched, and MODE is its mode. */
3937 binary_fp_operator (op, mode)
3939 enum machine_mode mode;
3941 if (mode != VOIDmode && mode != GET_MODE (op))
3944 switch (GET_CODE (op))
3950 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
3958 mult_operator (op, mode)
3960 enum machine_mode mode ATTRIBUTE_UNUSED;
3962 return GET_CODE (op) == MULT;
3966 div_operator (op, mode)
3968 enum machine_mode mode ATTRIBUTE_UNUSED;
3970 return GET_CODE (op) == DIV;
3974 arith_or_logical_operator (op, mode)
3976 enum machine_mode mode;
3978 return ((mode == VOIDmode || GET_MODE (op) == mode)
3979 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3980 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
3983 /* Returns 1 if OP is memory operand with a displacement. */
3986 memory_displacement_operand (op, mode)
3988 enum machine_mode mode;
3990 struct ix86_address parts;
3992 if (! memory_operand (op, mode))
3995 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3998 return parts.disp != NULL_RTX;
4001 /* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4002 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4004 ??? It seems likely that this will only work because cmpsi is an
4005 expander, and no actual insns use this. */
4008 cmpsi_operand (op, mode)
4010 enum machine_mode mode;
4012 if (nonimmediate_operand (op, mode))
4015 if (GET_CODE (op) == AND
4016 && GET_MODE (op) == SImode
4017 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4018 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4019 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4020 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4021 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4022 && GET_CODE (XEXP (op, 1)) == CONST_INT)
4028 /* Returns 1 if OP is memory operand that can not be represented by the
4032 long_memory_operand (op, mode)
4034 enum machine_mode mode;
4036 if (! memory_operand (op, mode))
4039 return memory_address_length (op) != 0;
4042 /* Return nonzero if the rtx is known aligned. */
4045 aligned_operand (op, mode)
4047 enum machine_mode mode;
4049 struct ix86_address parts;
4051 if (!general_operand (op, mode))
4054 /* Registers and immediate operands are always "aligned". */
4055 if (GET_CODE (op) != MEM)
4058 /* Don't even try to do any aligned optimizations with volatiles. */
4059 if (MEM_VOLATILE_P (op))
4064 /* Pushes and pops are only valid on the stack pointer. */
4065 if (GET_CODE (op) == PRE_DEC
4066 || GET_CODE (op) == POST_INC)
4069 /* Decode the address. */
4070 if (! ix86_decompose_address (op, &parts))
4073 if (parts.base && GET_CODE (parts.base) == SUBREG)
4074 parts.base = SUBREG_REG (parts.base);
4075 if (parts.index && GET_CODE (parts.index) == SUBREG)
4076 parts.index = SUBREG_REG (parts.index);
4078 /* Look for some component that isn't known to be aligned. */
4082 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4087 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4092 if (GET_CODE (parts.disp) != CONST_INT
4093 || (INTVAL (parts.disp) & 3) != 0)
4097 /* Didn't find one -- this must be an aligned address. */
4101 /* Return true if the constant is something that can be loaded with
4102 a special instruction. Only handle 0.0 and 1.0; others are less
4106 standard_80387_constant_p (x)
4109 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4111 /* Note that on the 80387, other constants, such as pi, that we should support
4112 too. On some machines, these are much slower to load as standard constant,
4113 than to load from doubles in memory. */
4114 if (x == CONST0_RTX (GET_MODE (x)))
4116 if (x == CONST1_RTX (GET_MODE (x)))
4121 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4124 standard_sse_constant_p (x)
4127 if (x == const0_rtx)
4129 return (x == CONST0_RTX (GET_MODE (x)));
4132 /* Returns 1 if OP contains a symbol reference */
4135 symbolic_reference_mentioned_p (op)
4138 register const char *fmt;
4141 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4144 fmt = GET_RTX_FORMAT (GET_CODE (op));
4145 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4151 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4152 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4156 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4163 /* Return 1 if it is appropriate to emit `ret' instructions in the
4164 body of a function. Do this only if the epilogue is simple, needing a
4165 couple of insns. Prior to reloading, we can't tell how many registers
4166 must be saved, so return 0 then. Return 0 if there is no frame
4167 marker to de-allocate.
4169 If NON_SAVING_SETJMP is defined and true, then it is not possible
4170 for the epilogue to be simple, so return 0. This is a special case
4171 since NON_SAVING_SETJMP will not cause regs_ever_live to change
4172 until final, but jump_optimize may need to know sooner if a
4176 ix86_can_use_return_insn_p ()
4178 struct ix86_frame frame;
4180 #ifdef NON_SAVING_SETJMP
4181 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4185 if (! reload_completed || frame_pointer_needed)
4188 /* Don't allow more than 32 pop, since that's all we can do
4189 with one instruction. */
4190 if (current_function_pops_args
4191 && current_function_args_size >= 32768)
4194 ix86_compute_frame_layout (&frame);
4195 return frame.to_allocate == 0 && frame.nregs == 0;
4198 /* Return 1 if VALUE can be stored in the sign extended immediate field. */
4200 x86_64_sign_extended_value (value)
4203 switch (GET_CODE (value))
4205 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4206 to be at least 32 and this all acceptable constants are
4207 represented as CONST_INT. */
4209 if (HOST_BITS_PER_WIDE_INT == 32)
4213 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4214 return trunc_int_for_mode (val, SImode) == val;
4218 /* For certain code models, the symbolic references are known to fit.
4219 in CM_SMALL_PIC model we know it fits if it is local to the shared
4220 library. Don't count TLS SYMBOL_REFs here, since they should fit
4221 only if inside of UNSPEC handled below. */
4223 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4225 /* For certain code models, the code is near as well. */
4227 return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4228 || ix86_cmodel == CM_KERNEL);
4230 /* We also may accept the offsetted memory references in certain special
4233 if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4234 switch (XINT (XEXP (value, 0), 1))
4236 case UNSPEC_GOTPCREL:
4238 case UNSPEC_GOTNTPOFF:
4244 if (GET_CODE (XEXP (value, 0)) == PLUS)
4246 rtx op1 = XEXP (XEXP (value, 0), 0);
4247 rtx op2 = XEXP (XEXP (value, 0), 1);
4248 HOST_WIDE_INT offset;
4250 if (ix86_cmodel == CM_LARGE)
4252 if (GET_CODE (op2) != CONST_INT)
4254 offset = trunc_int_for_mode (INTVAL (op2), DImode);
4255 switch (GET_CODE (op1))
4258 /* For CM_SMALL assume that latest object is 16MB before
4259 end of 31bits boundary. We may also accept pretty
4260 large negative constants knowing that all objects are
4261 in the positive half of address space. */
4262 if (ix86_cmodel == CM_SMALL
4263 && offset < 16*1024*1024
4264 && trunc_int_for_mode (offset, SImode) == offset)
4266 /* For CM_KERNEL we know that all object resist in the
4267 negative half of 32bits address space. We may not
4268 accept negative offsets, since they may be just off
4269 and we may accept pretty large positive ones. */
4270 if (ix86_cmodel == CM_KERNEL
4272 && trunc_int_for_mode (offset, SImode) == offset)
4276 /* These conditions are similar to SYMBOL_REF ones, just the
4277 constraints for code models differ. */
4278 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4279 && offset < 16*1024*1024
4280 && trunc_int_for_mode (offset, SImode) == offset)
4282 if (ix86_cmodel == CM_KERNEL
4284 && trunc_int_for_mode (offset, SImode) == offset)
4288 switch (XINT (op1, 1))
4293 && trunc_int_for_mode (offset, SImode) == offset)
4307 /* Return 1 if VALUE can be stored in the zero extended immediate field. */
4309 x86_64_zero_extended_value (value)
4312 switch (GET_CODE (value))
4315 if (HOST_BITS_PER_WIDE_INT == 32)
4316 return (GET_MODE (value) == VOIDmode
4317 && !CONST_DOUBLE_HIGH (value));
4321 if (HOST_BITS_PER_WIDE_INT == 32)
4322 return INTVAL (value) >= 0;
4324 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4327 /* For certain code models, the symbolic references are known to fit. */
4329 return ix86_cmodel == CM_SMALL;
4331 /* For certain code models, the code is near as well. */
4333 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4335 /* We also may accept the offsetted memory references in certain special
4338 if (GET_CODE (XEXP (value, 0)) == PLUS)
4340 rtx op1 = XEXP (XEXP (value, 0), 0);
4341 rtx op2 = XEXP (XEXP (value, 0), 1);
4343 if (ix86_cmodel == CM_LARGE)
4345 switch (GET_CODE (op1))
4349 /* For small code model we may accept pretty large positive
4350 offsets, since one bit is available for free. Negative
4351 offsets are limited by the size of NULL pointer area
4352 specified by the ABI. */
4353 if (ix86_cmodel == CM_SMALL
4354 && GET_CODE (op2) == CONST_INT
4355 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4356 && (trunc_int_for_mode (INTVAL (op2), SImode)
4359 /* ??? For the kernel, we may accept adjustment of
4360 -0x10000000, since we know that it will just convert
4361 negative address space to positive, but perhaps this
4362 is not worthwhile. */
4365 /* These conditions are similar to SYMBOL_REF ones, just the
4366 constraints for code models differ. */
4367 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4368 && GET_CODE (op2) == CONST_INT
4369 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4370 && (trunc_int_for_mode (INTVAL (op2), SImode)
4384 /* Value should be nonzero if functions must have frame pointers.
4385 Zero means the frame pointer need not be set up (and parms may
4386 be accessed via the stack pointer) in functions that seem suitable. */
4389 ix86_frame_pointer_required ()
4391 /* If we accessed previous frames, then the generated code expects
4392 to be able to access the saved ebp value in our frame. */
4393 if (cfun->machine->accesses_prev_frame)
4396 /* Several x86 os'es need a frame pointer for other reasons,
4397 usually pertaining to setjmp. */
4398 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4401 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4402 the frame pointer by default. Turn it back on now if we've not
4403 got a leaf function. */
4404 if (TARGET_OMIT_LEAF_FRAME_POINTER
4405 && (!current_function_is_leaf))
4408 if (current_function_profile)
4414 /* Record that the current function accesses previous call frames. */
4417 ix86_setup_frame_addresses ()
4419 cfun->machine->accesses_prev_frame = 1;
4422 #if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4423 # define USE_HIDDEN_LINKONCE 1
4425 # define USE_HIDDEN_LINKONCE 0
4428 static int pic_labels_used;
4430 /* Fills in the label name that should be used for a pc thunk for
4431 the given register. */
4434 get_pc_thunk_name (name, regno)
4438 if (USE_HIDDEN_LINKONCE)
4439 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4441 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4445 /* This function generates code for -fpic that loads %ebx with
4446 the return address of the caller and then returns. */
4449 ix86_asm_file_end (file)
4455 for (regno = 0; regno < 8; ++regno)
4459 if (! ((pic_labels_used >> regno) & 1))
4462 get_pc_thunk_name (name, regno);
4464 if (USE_HIDDEN_LINKONCE)
4468 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4470 TREE_PUBLIC (decl) = 1;
4471 TREE_STATIC (decl) = 1;
4472 DECL_ONE_ONLY (decl) = 1;
4474 (*targetm.asm_out.unique_section) (decl, 0);
4475 named_section (decl, NULL, 0);
4477 (*targetm.asm_out.globalize_label) (file, name);
4478 fputs ("\t.hidden\t", file);
4479 assemble_name (file, name);
4481 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4486 ASM_OUTPUT_LABEL (file, name);
4489 xops[0] = gen_rtx_REG (SImode, regno);
4490 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4491 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4492 output_asm_insn ("ret", xops);
4496 /* Emit code for the SET_GOT patterns. */
4499 output_set_got (dest)
4505 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4507 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4509 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4512 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4514 output_asm_insn ("call\t%a2", xops);
4517 /* Output the "canonical" label name ("Lxx$pb") here too. This
4518 is what will be referred to by the Mach-O PIC subsystem. */
4519 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4521 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4522 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4525 output_asm_insn ("pop{l}\t%0", xops);
4530 get_pc_thunk_name (name, REGNO (dest));
4531 pic_labels_used |= 1 << REGNO (dest);
4533 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4534 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4535 output_asm_insn ("call\t%X2", xops);
4538 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4539 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4540 else if (!TARGET_MACHO)
4541 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4546 /* Generate an "push" pattern for input ARG. */
4552 return gen_rtx_SET (VOIDmode,
4554 gen_rtx_PRE_DEC (Pmode,
4555 stack_pointer_rtx)),
4559 /* Return >= 0 if there is an unused call-clobbered register available
4560 for the entire function. */
4563 ix86_select_alt_pic_regnum ()
4565 if (current_function_is_leaf && !current_function_profile)
4568 for (i = 2; i >= 0; --i)
4569 if (!regs_ever_live[i])
4573 return INVALID_REGNUM;
4576 /* Return 1 if we need to save REGNO. */
4578 ix86_save_reg (regno, maybe_eh_return)
4580 int maybe_eh_return;
4582 if (pic_offset_table_rtx
4583 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4584 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4585 || current_function_profile
4586 || current_function_calls_eh_return))
4588 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4593 if (current_function_calls_eh_return && maybe_eh_return)
4598 unsigned test = EH_RETURN_DATA_REGNO (i);
4599 if (test == INVALID_REGNUM)
4606 return (regs_ever_live[regno]
4607 && !call_used_regs[regno]
4608 && !fixed_regs[regno]
4609 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4612 /* Return number of registers to be saved on the stack. */
4620 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4621 if (ix86_save_reg (regno, true))
4626 /* Return the offset between two registers, one to be eliminated, and the other
4627 its replacement, at the start of a routine. */
4630 ix86_initial_elimination_offset (from, to)
4634 struct ix86_frame frame;
4635 ix86_compute_frame_layout (&frame);
4637 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4638 return frame.hard_frame_pointer_offset;
4639 else if (from == FRAME_POINTER_REGNUM
4640 && to == HARD_FRAME_POINTER_REGNUM)
4641 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4644 if (to != STACK_POINTER_REGNUM)
4646 else if (from == ARG_POINTER_REGNUM)
4647 return frame.stack_pointer_offset;
4648 else if (from != FRAME_POINTER_REGNUM)
4651 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4655 /* Fill structure ix86_frame about frame of currently computed function. */
4658 ix86_compute_frame_layout (frame)
4659 struct ix86_frame *frame;
4661 HOST_WIDE_INT total_size;
4662 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4664 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4665 HOST_WIDE_INT size = get_frame_size ();
4667 frame->nregs = ix86_nsaved_regs ();
4670 /* Skip return address and saved base pointer. */
4671 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4673 frame->hard_frame_pointer_offset = offset;
4675 /* Do some sanity checking of stack_alignment_needed and
4676 preferred_alignment, since i386 port is the only using those features
4677 that may break easily. */
4679 if (size && !stack_alignment_needed)
4681 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4683 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4685 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4688 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4689 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4691 /* Register save area */
4692 offset += frame->nregs * UNITS_PER_WORD;
4695 if (ix86_save_varrargs_registers)
4697 offset += X86_64_VARARGS_SIZE;
4698 frame->va_arg_size = X86_64_VARARGS_SIZE;
4701 frame->va_arg_size = 0;
4703 /* Align start of frame for local function. */
4704 frame->padding1 = ((offset + stack_alignment_needed - 1)
4705 & -stack_alignment_needed) - offset;
4707 offset += frame->padding1;
4709 /* Frame pointer points here. */
4710 frame->frame_pointer_offset = offset;
4714 /* Add outgoing arguments area. Can be skipped if we eliminated
4715 all the function calls as dead code. */
4716 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4718 offset += current_function_outgoing_args_size;
4719 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4722 frame->outgoing_arguments_size = 0;
4724 /* Align stack boundary. Only needed if we're calling another function
4726 if (!current_function_is_leaf || current_function_calls_alloca)
4727 frame->padding2 = ((offset + preferred_alignment - 1)
4728 & -preferred_alignment) - offset;
4730 frame->padding2 = 0;
4732 offset += frame->padding2;
4734 /* We've reached end of stack frame. */
4735 frame->stack_pointer_offset = offset;
4737 /* Size prologue needs to allocate. */
4738 frame->to_allocate =
4739 (size + frame->padding1 + frame->padding2
4740 + frame->outgoing_arguments_size + frame->va_arg_size);
4742 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4743 && current_function_is_leaf)
4745 frame->red_zone_size = frame->to_allocate;
4746 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4747 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4750 frame->red_zone_size = 0;
4751 frame->to_allocate -= frame->red_zone_size;
4752 frame->stack_pointer_offset -= frame->red_zone_size;
4754 fprintf (stderr, "nregs: %i\n", frame->nregs);
4755 fprintf (stderr, "size: %i\n", size);
4756 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4757 fprintf (stderr, "padding1: %i\n", frame->padding1);
4758 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4759 fprintf (stderr, "padding2: %i\n", frame->padding2);
4760 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4761 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4762 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4763 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4764 frame->hard_frame_pointer_offset);
4765 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4769 /* Emit code to save registers in the prologue. */
4772 ix86_emit_save_regs ()
4777 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4778 if (ix86_save_reg (regno, true))
4780 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4781 RTX_FRAME_RELATED_P (insn) = 1;
4785 /* Emit code to save registers using MOV insns. First register
4786 is restored from POINTER + OFFSET. */
4788 ix86_emit_save_regs_using_mov (pointer, offset)
4790 HOST_WIDE_INT offset;
4795 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4796 if (ix86_save_reg (regno, true))
4798 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4800 gen_rtx_REG (Pmode, regno));
4801 RTX_FRAME_RELATED_P (insn) = 1;
4802 offset += UNITS_PER_WORD;
4806 /* Expand the prologue into a bunch of separate insns. */
4809 ix86_expand_prologue ()
4813 struct ix86_frame frame;
4815 HOST_WIDE_INT allocate;
4817 ix86_compute_frame_layout (&frame);
4820 int count = frame.nregs;
4822 /* The fast prologue uses move instead of push to save registers. This
4823 is significantly longer, but also executes faster as modern hardware
4824 can execute the moves in parallel, but can't do that for push/pop.
4826 Be careful about choosing what prologue to emit: When function takes
4827 many instructions to execute we may use slow version as well as in
4828 case function is known to be outside hot spot (this is known with
4829 feedback only). Weight the size of function by number of registers
4830 to save as it is cheap to use one or two push instructions but very
4831 slow to use many of them. */
4833 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4834 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4835 || (flag_branch_probabilities
4836 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4837 use_fast_prologue_epilogue = 0;
4839 use_fast_prologue_epilogue = !expensive_function_p (count);
4840 if (TARGET_PROLOGUE_USING_MOVE)
4841 use_mov = use_fast_prologue_epilogue;
4844 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4845 slower on all targets. Also sdb doesn't like it. */
4847 if (frame_pointer_needed)
4849 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4850 RTX_FRAME_RELATED_P (insn) = 1;
4852 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4853 RTX_FRAME_RELATED_P (insn) = 1;
4856 allocate = frame.to_allocate;
4857 /* In case we are dealing only with single register and empty frame,
4858 push is equivalent of the mov+add sequence. */
4859 if (allocate == 0 && frame.nregs <= 1)
4863 ix86_emit_save_regs ();
4865 allocate += frame.nregs * UNITS_PER_WORD;
4869 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4871 insn = emit_insn (gen_pro_epilogue_adjust_stack
4872 (stack_pointer_rtx, stack_pointer_rtx,
4873 GEN_INT (-allocate)));
4874 RTX_FRAME_RELATED_P (insn) = 1;
4878 /* ??? Is this only valid for Win32? */
4885 arg0 = gen_rtx_REG (SImode, 0);
4886 emit_move_insn (arg0, GEN_INT (allocate));
4888 sym = gen_rtx_MEM (FUNCTION_MODE,
4889 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
4890 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
4892 CALL_INSN_FUNCTION_USAGE (insn)
4893 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4894 CALL_INSN_FUNCTION_USAGE (insn));
4896 /* Don't allow scheduling pass to move insns across __alloca
4898 emit_insn (gen_blockage (const0_rtx));
4902 if (!frame_pointer_needed || !frame.to_allocate)
4903 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4905 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4906 -frame.nregs * UNITS_PER_WORD);
4909 #ifdef SUBTARGET_PROLOGUE
4913 pic_reg_used = false;
4914 if (pic_offset_table_rtx
4915 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4916 || current_function_profile))
4918 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4920 if (alt_pic_reg_used != INVALID_REGNUM)
4921 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4923 pic_reg_used = true;
4928 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4930 /* Even with accurate pre-reload life analysis, we can wind up
4931 deleting all references to the pic register after reload.
4932 Consider if cross-jumping unifies two sides of a branch
4933 controlled by a comparison vs the only read from a global.
4934 In which case, allow the set_got to be deleted, though we're
4935 too late to do anything about the ebx save in the prologue. */
4936 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4939 /* Prevent function calls from be scheduled before the call to mcount.
4940 In the pic_reg_used case, make sure that the got load isn't deleted. */
4941 if (current_function_profile)
4942 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
4945 /* Emit code to restore saved registers using MOV insns. First register
4946 is restored from POINTER + OFFSET. */
4948 ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4951 int maybe_eh_return;
4955 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4956 if (ix86_save_reg (regno, maybe_eh_return))
4958 emit_move_insn (gen_rtx_REG (Pmode, regno),
4959 adjust_address (gen_rtx_MEM (Pmode, pointer),
4961 offset += UNITS_PER_WORD;
4965 /* Restore function stack, frame, and registers. */
4968 ix86_expand_epilogue (style)
4972 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4973 struct ix86_frame frame;
4974 HOST_WIDE_INT offset;
4976 ix86_compute_frame_layout (&frame);
4978 /* Calculate start of saved registers relative to ebp. Special care
4979 must be taken for the normal return case of a function using
4980 eh_return: the eax and edx registers are marked as saved, but not
4981 restored along this path. */
4982 offset = frame.nregs;
4983 if (current_function_calls_eh_return && style != 2)
4985 offset *= -UNITS_PER_WORD;
4987 /* If we're only restoring one register and sp is not valid then
4988 using a move instruction to restore the register since it's
4989 less work than reloading sp and popping the register.
4991 The default code result in stack adjustment using add/lea instruction,
4992 while this code results in LEAVE instruction (or discrete equivalent),
4993 so it is profitable in some other cases as well. Especially when there
4994 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4995 and there is exactly one register to pop. This heuristic may need some
4996 tuning in future. */
4997 if ((!sp_valid && frame.nregs <= 1)
4998 || (TARGET_EPILOGUE_USING_MOVE
4999 && use_fast_prologue_epilogue
5000 && (frame.nregs > 1 || frame.to_allocate))
5001 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5002 || (frame_pointer_needed && TARGET_USE_LEAVE
5003 && use_fast_prologue_epilogue && frame.nregs == 1)
5004 || current_function_calls_eh_return)
5006 /* Restore registers. We can use ebp or esp to address the memory
5007 locations. If both are available, default to ebp, since offsets
5008 are known to be small. Only exception is esp pointing directly to the
5009 end of block of saved registers, where we may simplify addressing
5012 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5013 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5014 frame.to_allocate, style == 2);
5016 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5017 offset, style == 2);
5019 /* eh_return epilogues need %ecx added to the stack pointer. */
5022 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5024 if (frame_pointer_needed)
5026 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5027 tmp = plus_constant (tmp, UNITS_PER_WORD);
5028 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5030 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5031 emit_move_insn (hard_frame_pointer_rtx, tmp);
5033 emit_insn (gen_pro_epilogue_adjust_stack
5034 (stack_pointer_rtx, sa, const0_rtx));
5038 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5039 tmp = plus_constant (tmp, (frame.to_allocate
5040 + frame.nregs * UNITS_PER_WORD));
5041 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5044 else if (!frame_pointer_needed)
5045 emit_insn (gen_pro_epilogue_adjust_stack
5046 (stack_pointer_rtx, stack_pointer_rtx,
5047 GEN_INT (frame.to_allocate
5048 + frame.nregs * UNITS_PER_WORD)));
5049 /* If not an i386, mov & pop is faster than "leave". */
5050 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
5051 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5054 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5055 hard_frame_pointer_rtx,
5058 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5060 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5065 /* First step is to deallocate the stack frame so that we can
5066 pop the registers. */
5069 if (!frame_pointer_needed)
5071 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
5072 hard_frame_pointer_rtx,
5075 else if (frame.to_allocate)
5076 emit_insn (gen_pro_epilogue_adjust_stack
5077 (stack_pointer_rtx, stack_pointer_rtx,
5078 GEN_INT (frame.to_allocate)));
5080 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5081 if (ix86_save_reg (regno, false))
5084 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5086 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5088 if (frame_pointer_needed)
5090 /* Leave results in shorter dependency chains on CPUs that are
5091 able to grok it fast. */
5092 if (TARGET_USE_LEAVE)
5093 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5094 else if (TARGET_64BIT)
5095 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5097 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5101 /* Sibcall epilogues don't want a return instruction. */
5105 if (current_function_pops_args && current_function_args_size)
5107 rtx popc = GEN_INT (current_function_pops_args);
5109 /* i386 can only pop 64K bytes. If asked to pop more, pop
5110 return address, do explicit add, and jump indirectly to the
5113 if (current_function_pops_args >= 65536)
5115 rtx ecx = gen_rtx_REG (SImode, 2);
5117 /* There are is no "pascal" calling convention in 64bit ABI. */
5121 emit_insn (gen_popsi1 (ecx));
5122 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5123 emit_jump_insn (gen_return_indirect_internal (ecx));
5126 emit_jump_insn (gen_return_pop_internal (popc));
5129 emit_jump_insn (gen_return_internal ());
5132 /* Reset from the function's potential modifications. */
5135 ix86_output_function_epilogue (file, size)
5136 FILE *file ATTRIBUTE_UNUSED;
5137 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
5139 if (pic_offset_table_rtx)
5140 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5143 /* Extract the parts of an RTL expression that is a valid memory address
5144 for an instruction. Return 0 if the structure of the address is
5145 grossly off. Return -1 if the address contains ASHIFT, so it is not
5146 strictly valid, but still used for computing length of lea instruction.
5150 ix86_decompose_address (addr, out)
5152 struct ix86_address *out;
5154 rtx base = NULL_RTX;
5155 rtx index = NULL_RTX;
5156 rtx disp = NULL_RTX;
5157 HOST_WIDE_INT scale = 1;
5158 rtx scale_rtx = NULL_RTX;
5161 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
5163 else if (GET_CODE (addr) == PLUS)
5165 rtx op0 = XEXP (addr, 0);
5166 rtx op1 = XEXP (addr, 1);
5167 enum rtx_code code0 = GET_CODE (op0);
5168 enum rtx_code code1 = GET_CODE (op1);
5170 if (code0 == REG || code0 == SUBREG)
5172 if (code1 == REG || code1 == SUBREG)
5173 index = op0, base = op1; /* index + base */
5175 base = op0, disp = op1; /* base + displacement */
5177 else if (code0 == MULT)
5179 index = XEXP (op0, 0);
5180 scale_rtx = XEXP (op0, 1);
5181 if (code1 == REG || code1 == SUBREG)
5182 base = op1; /* index*scale + base */
5184 disp = op1; /* index*scale + disp */
5186 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
5188 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
5189 scale_rtx = XEXP (XEXP (op0, 0), 1);
5190 base = XEXP (op0, 1);
5193 else if (code0 == PLUS)
5195 index = XEXP (op0, 0); /* index + base + disp */
5196 base = XEXP (op0, 1);
5202 else if (GET_CODE (addr) == MULT)
5204 index = XEXP (addr, 0); /* index*scale */
5205 scale_rtx = XEXP (addr, 1);
5207 else if (GET_CODE (addr) == ASHIFT)
5211 /* We're called for lea too, which implements ashift on occasion. */
5212 index = XEXP (addr, 0);
5213 tmp = XEXP (addr, 1);
5214 if (GET_CODE (tmp) != CONST_INT)
5216 scale = INTVAL (tmp);
5217 if ((unsigned HOST_WIDE_INT) scale > 3)
5223 disp = addr; /* displacement */
5225 /* Extract the integral value of scale. */
5228 if (GET_CODE (scale_rtx) != CONST_INT)
5230 scale = INTVAL (scale_rtx);
5233 /* Allow arg pointer and stack pointer as index if there is not scaling */
5234 if (base && index && scale == 1
5235 && (index == arg_pointer_rtx || index == frame_pointer_rtx
5236 || index == stack_pointer_rtx))
5243 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5244 if ((base == hard_frame_pointer_rtx
5245 || base == frame_pointer_rtx
5246 || base == arg_pointer_rtx) && !disp)
5249 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5250 Avoid this by transforming to [%esi+0]. */
5251 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
5252 && base && !index && !disp
5254 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5257 /* Special case: encode reg+reg instead of reg*2. */
5258 if (!base && index && scale && scale == 2)
5259 base = index, scale = 1;
5261 /* Special case: scaling cannot be encoded without base or displacement. */
5262 if (!base && !disp && index && scale != 1)
5273 /* Return cost of the memory address x.
5274 For i386, it is better to use a complex address than let gcc copy
5275 the address into a reg and make a new pseudo. But not if the address
5276 requires to two regs - that would mean more pseudos with longer
5279 ix86_address_cost (x)
5282 struct ix86_address parts;
5285 if (!ix86_decompose_address (x, &parts))
5288 if (parts.base && GET_CODE (parts.base) == SUBREG)
5289 parts.base = SUBREG_REG (parts.base);
5290 if (parts.index && GET_CODE (parts.index) == SUBREG)
5291 parts.index = SUBREG_REG (parts.index);
5293 /* More complex memory references are better. */
5294 if (parts.disp && parts.disp != const0_rtx)
5297 /* Attempt to minimize number of registers in the address. */
5299 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5301 && (!REG_P (parts.index)
5302 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5306 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5308 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5309 && parts.base != parts.index)
5312 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5313 since it's predecode logic can't detect the length of instructions
5314 and it degenerates to vector decoded. Increase cost of such
5315 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5316 to split such addresses or even refuse such addresses at all.
5318 Following addressing modes are affected:
5323 The first and last case may be avoidable by explicitly coding the zero in
5324 memory address, but I don't have AMD-K6 machine handy to check this
5328 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5329 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5330 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5336 /* If X is a machine specific address (i.e. a symbol or label being
5337 referenced as a displacement from the GOT implemented using an
5338 UNSPEC), then return the base term. Otherwise return X. */
5341 ix86_find_base_term (x)
5348 if (GET_CODE (x) != CONST)
5351 if (GET_CODE (term) == PLUS
5352 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5353 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5354 term = XEXP (term, 0);
5355 if (GET_CODE (term) != UNSPEC
5356 || XINT (term, 1) != UNSPEC_GOTPCREL)
5359 term = XVECEXP (term, 0, 0);
5361 if (GET_CODE (term) != SYMBOL_REF
5362 && GET_CODE (term) != LABEL_REF)
5368 if (GET_CODE (x) != PLUS
5369 || XEXP (x, 0) != pic_offset_table_rtx
5370 || GET_CODE (XEXP (x, 1)) != CONST)
5373 term = XEXP (XEXP (x, 1), 0);
5375 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
5376 term = XEXP (term, 0);
5378 if (GET_CODE (term) != UNSPEC
5379 || XINT (term, 1) != UNSPEC_GOTOFF)
5382 term = XVECEXP (term, 0, 0);
5384 if (GET_CODE (term) != SYMBOL_REF
5385 && GET_CODE (term) != LABEL_REF)
5391 /* Determine if a given RTX is a valid constant. We already know this
5392 satisfies CONSTANT_P. */
5395 legitimate_constant_p (x)
5400 switch (GET_CODE (x))
5403 /* TLS symbols are not constant. */
5404 if (tls_symbolic_operand (x, Pmode))
5409 inner = XEXP (x, 0);
5411 /* Offsets of TLS symbols are never valid.
5412 Discourage CSE from creating them. */
5413 if (GET_CODE (inner) == PLUS
5414 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
5417 /* Only some unspecs are valid as "constants". */
5418 if (GET_CODE (inner) == UNSPEC)
5419 switch (XINT (inner, 1))
5422 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5432 /* Otherwise we handle everything else in the move patterns. */
5436 /* Determine if it's legal to put X into the constant pool. This
5437 is not possible for the address of thread-local symbols, which
5438 is checked above. */
5441 ix86_cannot_force_const_mem (x)
5444 return !legitimate_constant_p (x);
5447 /* Determine if a given RTX is a valid constant address. */
5450 constant_address_p (x)
5453 switch (GET_CODE (x))
5460 return TARGET_64BIT;
5463 /* For Mach-O, really believe the CONST. */
5466 /* Otherwise fall through. */
5468 return !flag_pic && legitimate_constant_p (x);
5475 /* Nonzero if the constant value X is a legitimate general operand
5476 when generating PIC code. It is given that flag_pic is on and
5477 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5480 legitimate_pic_operand_p (x)
5485 switch (GET_CODE (x))
5488 inner = XEXP (x, 0);
5490 /* Only some unspecs are valid as "constants". */
5491 if (GET_CODE (inner) == UNSPEC)
5492 switch (XINT (inner, 1))
5495 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5503 return legitimate_pic_address_disp_p (x);
5510 /* Determine if a given CONST RTX is a valid memory displacement
5514 legitimate_pic_address_disp_p (disp)
5519 /* In 64bit mode we can allow direct addresses of symbols and labels
5520 when they are not dynamic symbols. */
5523 /* TLS references should always be enclosed in UNSPEC. */
5524 if (tls_symbolic_operand (disp, GET_MODE (disp)))
5526 if (GET_CODE (disp) == SYMBOL_REF
5527 && ix86_cmodel == CM_SMALL_PIC
5528 && (CONSTANT_POOL_ADDRESS_P (disp)
5529 || SYMBOL_REF_FLAG (disp)))
5531 if (GET_CODE (disp) == LABEL_REF)
5533 if (GET_CODE (disp) == CONST
5534 && GET_CODE (XEXP (disp, 0)) == PLUS
5535 && ((GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
5536 && ix86_cmodel == CM_SMALL_PIC
5537 && (CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (disp, 0), 0))
5538 || SYMBOL_REF_FLAG (XEXP (XEXP (disp, 0), 0))))
5539 || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
5540 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT
5541 && INTVAL (XEXP (XEXP (disp, 0), 1)) < 16*1024*1024
5542 && INTVAL (XEXP (XEXP (disp, 0), 1)) >= -16*1024*1024)
5545 if (GET_CODE (disp) != CONST)
5547 disp = XEXP (disp, 0);
5551 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5552 of GOT tables. We should not need these anyway. */
5553 if (GET_CODE (disp) != UNSPEC
5554 || XINT (disp, 1) != UNSPEC_GOTPCREL)
5557 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5558 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5564 if (GET_CODE (disp) == PLUS)
5566 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5568 disp = XEXP (disp, 0);
5572 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5573 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5575 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5576 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5577 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5579 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5580 if (strstr (sym_name, "$pb") != 0)
5585 if (GET_CODE (disp) != UNSPEC)
5588 switch (XINT (disp, 1))
5593 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5595 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5596 case UNSPEC_GOTTPOFF:
5597 case UNSPEC_GOTNTPOFF:
5598 case UNSPEC_INDNTPOFF:
5601 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5603 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5605 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5611 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5612 memory address for an instruction. The MODE argument is the machine mode
5613 for the MEM expression that wants to use this address.
5615 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5616 convert common non-canonical forms to canonical form so that they will
5620 legitimate_address_p (mode, addr, strict)
5621 enum machine_mode mode;
5625 struct ix86_address parts;
5626 rtx base, index, disp;
5627 HOST_WIDE_INT scale;
5628 const char *reason = NULL;
5629 rtx reason_rtx = NULL_RTX;
5631 if (TARGET_DEBUG_ADDR)
5634 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5635 GET_MODE_NAME (mode), strict);
5639 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5641 if (TARGET_DEBUG_ADDR)
5642 fprintf (stderr, "Success.\n");
5646 if (ix86_decompose_address (addr, &parts) <= 0)
5648 reason = "decomposition failed";
5653 index = parts.index;
5655 scale = parts.scale;
5657 /* Validate base register.
5659 Don't allow SUBREG's here, it can lead to spill failures when the base
5660 is one word out of a two word structure, which is represented internally
5668 if (GET_CODE (base) == SUBREG)
5669 reg = SUBREG_REG (base);
5673 if (GET_CODE (reg) != REG)
5675 reason = "base is not a register";
5679 if (GET_MODE (base) != Pmode)
5681 reason = "base is not in Pmode";
5685 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5686 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5688 reason = "base is not valid";
5693 /* Validate index register.
5695 Don't allow SUBREG's here, it can lead to spill failures when the index
5696 is one word out of a two word structure, which is represented internally
5704 if (GET_CODE (index) == SUBREG)
5705 reg = SUBREG_REG (index);
5709 if (GET_CODE (reg) != REG)
5711 reason = "index is not a register";
5715 if (GET_MODE (index) != Pmode)
5717 reason = "index is not in Pmode";
5721 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5722 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5724 reason = "index is not valid";
5729 /* Validate scale factor. */
5732 reason_rtx = GEN_INT (scale);
5735 reason = "scale without index";
5739 if (scale != 2 && scale != 4 && scale != 8)
5741 reason = "scale is not a valid multiplier";
5746 /* Validate displacement. */
5751 if (GET_CODE (disp) == CONST
5752 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5753 switch (XINT (XEXP (disp, 0), 1))
5757 case UNSPEC_GOTPCREL:
5760 goto is_legitimate_pic;
5762 case UNSPEC_GOTTPOFF:
5763 case UNSPEC_GOTNTPOFF:
5764 case UNSPEC_INDNTPOFF:
5770 reason = "invalid address unspec";
5774 else if (flag_pic && (SYMBOLIC_CONST (disp)
5776 && !machopic_operand_p (disp)
5781 if (TARGET_64BIT && (index || base))
5783 /* foo@dtpoff(%rX) is ok. */
5784 if (GET_CODE (disp) != CONST
5785 || GET_CODE (XEXP (disp, 0)) != PLUS
5786 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5787 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5788 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5789 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5791 reason = "non-constant pic memory reference";
5795 else if (! legitimate_pic_address_disp_p (disp))
5797 reason = "displacement is an invalid pic construct";
5801 /* This code used to verify that a symbolic pic displacement
5802 includes the pic_offset_table_rtx register.
5804 While this is good idea, unfortunately these constructs may
5805 be created by "adds using lea" optimization for incorrect
5814 This code is nonsensical, but results in addressing
5815 GOT table with pic_offset_table_rtx base. We can't
5816 just refuse it easily, since it gets matched by
5817 "addsi3" pattern, that later gets split to lea in the
5818 case output register differs from input. While this
5819 can be handled by separate addsi pattern for this case
5820 that never results in lea, this seems to be easier and
5821 correct fix for crash to disable this test. */
5823 else if (!CONSTANT_ADDRESS_P (disp))
5825 reason = "displacement is not constant";
5828 else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
5830 reason = "displacement is out of range";
5833 else if (!TARGET_64BIT && GET_CODE (disp) == CONST_DOUBLE)
5835 reason = "displacement is a const_double";
5840 /* Everything looks valid. */
5841 if (TARGET_DEBUG_ADDR)
5842 fprintf (stderr, "Success.\n");
5846 if (TARGET_DEBUG_ADDR)
5848 fprintf (stderr, "Error: %s\n", reason);
5849 debug_rtx (reason_rtx);
5854 /* Return an unique alias set for the GOT. */
5856 static HOST_WIDE_INT
5857 ix86_GOT_alias_set ()
5859 static HOST_WIDE_INT set = -1;
5861 set = new_alias_set ();
5865 /* Return a legitimate reference for ORIG (an address) using the
5866 register REG. If REG is 0, a new pseudo is generated.
5868 There are two types of references that must be handled:
5870 1. Global data references must load the address from the GOT, via
5871 the PIC reg. An insn is emitted to do this load, and the reg is
5874 2. Static data references, constant pool addresses, and code labels
5875 compute the address as an offset from the GOT, whose base is in
5876 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5877 differentiate them from global data objects. The returned
5878 address is the PIC reg + an unspec constant.
5880 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
5881 reg also appears in the address. */
5884 legitimize_pic_address (orig, reg)
5894 reg = gen_reg_rtx (Pmode);
5895 /* Use the generic Mach-O PIC machinery. */
5896 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5899 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
5901 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
5903 /* This symbol may be referenced via a displacement from the PIC
5904 base address (@GOTOFF). */
5906 if (reload_in_progress)
5907 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5908 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5909 new = gen_rtx_CONST (Pmode, new);
5910 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5914 emit_move_insn (reg, new);
5918 else if (GET_CODE (addr) == SYMBOL_REF)
5922 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
5923 new = gen_rtx_CONST (Pmode, new);
5924 new = gen_rtx_MEM (Pmode, new);
5925 RTX_UNCHANGING_P (new) = 1;
5926 set_mem_alias_set (new, ix86_GOT_alias_set ());
5929 reg = gen_reg_rtx (Pmode);
5930 /* Use directly gen_movsi, otherwise the address is loaded
5931 into register for CSE. We don't want to CSE this addresses,
5932 instead we CSE addresses from the GOT table, so skip this. */
5933 emit_insn (gen_movsi (reg, new));
5938 /* This symbol must be referenced via a load from the
5939 Global Offset Table (@GOT). */
5941 if (reload_in_progress)
5942 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5943 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5944 new = gen_rtx_CONST (Pmode, new);
5945 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5946 new = gen_rtx_MEM (Pmode, new);
5947 RTX_UNCHANGING_P (new) = 1;
5948 set_mem_alias_set (new, ix86_GOT_alias_set ());
5951 reg = gen_reg_rtx (Pmode);
5952 emit_move_insn (reg, new);
5958 if (GET_CODE (addr) == CONST)
5960 addr = XEXP (addr, 0);
5962 /* We must match stuff we generate before. Assume the only
5963 unspecs that can get here are ours. Not that we could do
5964 anything with them anyway... */
5965 if (GET_CODE (addr) == UNSPEC
5966 || (GET_CODE (addr) == PLUS
5967 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5969 if (GET_CODE (addr) != PLUS)
5972 if (GET_CODE (addr) == PLUS)
5974 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
5976 /* Check first to see if this is a constant offset from a @GOTOFF
5977 symbol reference. */
5978 if (local_symbolic_operand (op0, Pmode)
5979 && GET_CODE (op1) == CONST_INT)
5983 if (reload_in_progress)
5984 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5985 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5987 new = gen_rtx_PLUS (Pmode, new, op1);
5988 new = gen_rtx_CONST (Pmode, new);
5989 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5993 emit_move_insn (reg, new);
5999 if (INTVAL (op1) < -16*1024*1024
6000 || INTVAL (op1) >= 16*1024*1024)
6001 new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6006 base = legitimize_pic_address (XEXP (addr, 0), reg);
6007 new = legitimize_pic_address (XEXP (addr, 1),
6008 base == reg ? NULL_RTX : reg);
6010 if (GET_CODE (new) == CONST_INT)
6011 new = plus_constant (base, INTVAL (new));
6014 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6016 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6017 new = XEXP (new, 1);
6019 new = gen_rtx_PLUS (Pmode, base, new);
6028 ix86_encode_section_info (decl, first)
6030 int first ATTRIBUTE_UNUSED;
6032 bool local_p = (*targetm.binds_local_p) (decl);
6035 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
6036 if (GET_CODE (rtl) != MEM)
6038 symbol = XEXP (rtl, 0);
6039 if (GET_CODE (symbol) != SYMBOL_REF)
6042 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
6043 symbol so that we may access it directly in the GOT. */
6046 SYMBOL_REF_FLAG (symbol) = local_p;
6048 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
6049 "local dynamic", "initial exec" or "local exec" TLS models
6052 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
6054 const char *symbol_str;
6057 enum tls_model kind = decl_tls_model (decl);
6059 if (TARGET_64BIT && ! flag_pic)
6061 /* x86-64 doesn't allow non-pic code for shared libraries,
6062 so don't generate GD/LD TLS models for non-pic code. */
6065 case TLS_MODEL_GLOBAL_DYNAMIC:
6066 kind = TLS_MODEL_INITIAL_EXEC; break;
6067 case TLS_MODEL_LOCAL_DYNAMIC:
6068 kind = TLS_MODEL_LOCAL_EXEC; break;
6074 symbol_str = XSTR (symbol, 0);
6076 if (symbol_str[0] == '%')
6078 if (symbol_str[1] == tls_model_chars[kind])
6082 len = strlen (symbol_str) + 1;
6083 newstr = alloca (len + 2);
6086 newstr[1] = tls_model_chars[kind];
6087 memcpy (newstr + 2, symbol_str, len);
6089 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
6093 /* Undo the above when printing symbol names. */
6096 ix86_strip_name_encoding (str)
6106 /* Load the thread pointer into a register. */
6109 get_thread_pointer ()
6113 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6114 tp = gen_rtx_MEM (Pmode, tp);
6115 RTX_UNCHANGING_P (tp) = 1;
6116 set_mem_alias_set (tp, ix86_GOT_alias_set ());
6117 tp = force_reg (Pmode, tp);
6122 /* Try machine-dependent ways of modifying an illegitimate address
6123 to be legitimate. If we find one, return the new, valid address.
6124 This macro is used in only one place: `memory_address' in explow.c.
6126 OLDX is the address as it was before break_out_memory_refs was called.
6127 In some cases it is useful to look at this to decide what needs to be done.
6129 MODE and WIN are passed so that this macro can use
6130 GO_IF_LEGITIMATE_ADDRESS.
6132 It is always safe for this macro to do nothing. It exists to recognize
6133 opportunities to optimize the output.
6135 For the 80386, we handle X+REG by loading X into a register R and
6136 using R+REG. R will go in a general reg and indexing will be used.
6137 However, if REG is a broken-out memory address or multiplication,
6138 nothing needs to be done because REG can certainly go in a general reg.
6140 When -fpic is used, special handling is needed for symbolic references.
6141 See comments by legitimize_pic_address in i386.c for details. */
6144 legitimize_address (x, oldx, mode)
6146 register rtx oldx ATTRIBUTE_UNUSED;
6147 enum machine_mode mode;
6152 if (TARGET_DEBUG_ADDR)
6154 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6155 GET_MODE_NAME (mode));
6159 log = tls_symbolic_operand (x, mode);
6162 rtx dest, base, off, pic;
6167 case TLS_MODEL_GLOBAL_DYNAMIC:
6168 dest = gen_reg_rtx (Pmode);
6171 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6174 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6175 insns = get_insns ();
6178 emit_libcall_block (insns, dest, rax, x);
6181 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6184 case TLS_MODEL_LOCAL_DYNAMIC:
6185 base = gen_reg_rtx (Pmode);
6188 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6191 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6192 insns = get_insns ();
6195 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6196 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6197 emit_libcall_block (insns, base, rax, note);
6200 emit_insn (gen_tls_local_dynamic_base_32 (base));
6202 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6203 off = gen_rtx_CONST (Pmode, off);
6205 return gen_rtx_PLUS (Pmode, base, off);
6207 case TLS_MODEL_INITIAL_EXEC:
6211 type = UNSPEC_GOTNTPOFF;
6215 if (reload_in_progress)
6216 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6217 pic = pic_offset_table_rtx;
6218 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6220 else if (!TARGET_GNU_TLS)
6222 pic = gen_reg_rtx (Pmode);
6223 emit_insn (gen_set_got (pic));
6224 type = UNSPEC_GOTTPOFF;
6229 type = UNSPEC_INDNTPOFF;
6232 base = get_thread_pointer ();
6234 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6235 off = gen_rtx_CONST (Pmode, off);
6237 off = gen_rtx_PLUS (Pmode, pic, off);
6238 off = gen_rtx_MEM (Pmode, off);
6239 RTX_UNCHANGING_P (off) = 1;
6240 set_mem_alias_set (off, ix86_GOT_alias_set ());
6241 dest = gen_reg_rtx (Pmode);
6243 if (TARGET_64BIT || TARGET_GNU_TLS)
6245 emit_move_insn (dest, off);
6246 return gen_rtx_PLUS (Pmode, base, dest);
6249 emit_insn (gen_subsi3 (dest, base, off));
6252 case TLS_MODEL_LOCAL_EXEC:
6253 base = get_thread_pointer ();
6255 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6256 (TARGET_64BIT || TARGET_GNU_TLS)
6257 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6258 off = gen_rtx_CONST (Pmode, off);
6260 if (TARGET_64BIT || TARGET_GNU_TLS)
6261 return gen_rtx_PLUS (Pmode, base, off);
6264 dest = gen_reg_rtx (Pmode);
6265 emit_insn (gen_subsi3 (dest, base, off));
6276 if (flag_pic && SYMBOLIC_CONST (x))
6277 return legitimize_pic_address (x, 0);
6279 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6280 if (GET_CODE (x) == ASHIFT
6281 && GET_CODE (XEXP (x, 1)) == CONST_INT
6282 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6285 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6286 GEN_INT (1 << log));
6289 if (GET_CODE (x) == PLUS)
6291 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6293 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6294 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6295 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6298 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6299 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6300 GEN_INT (1 << log));
6303 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6304 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6305 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6308 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6309 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6310 GEN_INT (1 << log));
6313 /* Put multiply first if it isn't already. */
6314 if (GET_CODE (XEXP (x, 1)) == MULT)
6316 rtx tmp = XEXP (x, 0);
6317 XEXP (x, 0) = XEXP (x, 1);
6322 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6323 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6324 created by virtual register instantiation, register elimination, and
6325 similar optimizations. */
6326 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6329 x = gen_rtx_PLUS (Pmode,
6330 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6331 XEXP (XEXP (x, 1), 0)),
6332 XEXP (XEXP (x, 1), 1));
6336 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6337 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6338 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6339 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6340 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6341 && CONSTANT_P (XEXP (x, 1)))
6344 rtx other = NULL_RTX;
6346 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6348 constant = XEXP (x, 1);
6349 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6351 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6353 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6354 other = XEXP (x, 1);
6362 x = gen_rtx_PLUS (Pmode,
6363 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6364 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6365 plus_constant (other, INTVAL (constant)));
6369 if (changed && legitimate_address_p (mode, x, FALSE))
6372 if (GET_CODE (XEXP (x, 0)) == MULT)
6375 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6378 if (GET_CODE (XEXP (x, 1)) == MULT)
6381 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6385 && GET_CODE (XEXP (x, 1)) == REG
6386 && GET_CODE (XEXP (x, 0)) == REG)
6389 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6392 x = legitimize_pic_address (x, 0);
6395 if (changed && legitimate_address_p (mode, x, FALSE))
6398 if (GET_CODE (XEXP (x, 0)) == REG)
6400 register rtx temp = gen_reg_rtx (Pmode);
6401 register rtx val = force_operand (XEXP (x, 1), temp);
6403 emit_move_insn (temp, val);
6409 else if (GET_CODE (XEXP (x, 1)) == REG)
6411 register rtx temp = gen_reg_rtx (Pmode);
6412 register rtx val = force_operand (XEXP (x, 0), temp);
6414 emit_move_insn (temp, val);
6424 /* Print an integer constant expression in assembler syntax. Addition
6425 and subtraction are the only arithmetic that may appear in these
6426 expressions. FILE is the stdio stream to write to, X is the rtx, and
6427 CODE is the operand print code from the output string. */
6430 output_pic_addr_const (file, x, code)
6437 switch (GET_CODE (x))
6447 assemble_name (file, XSTR (x, 0));
6448 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
6449 fputs ("@PLT", file);
6456 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6457 assemble_name (asm_out_file, buf);
6461 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6465 /* This used to output parentheses around the expression,
6466 but that does not work on the 386 (either ATT or BSD assembler). */
6467 output_pic_addr_const (file, XEXP (x, 0), code);
6471 if (GET_MODE (x) == VOIDmode)
6473 /* We can use %d if the number is <32 bits and positive. */
6474 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6475 fprintf (file, "0x%lx%08lx",
6476 (unsigned long) CONST_DOUBLE_HIGH (x),
6477 (unsigned long) CONST_DOUBLE_LOW (x));
6479 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6482 /* We can't handle floating point constants;
6483 PRINT_OPERAND must handle them. */
6484 output_operand_lossage ("floating constant misused");
6488 /* Some assemblers need integer constants to appear first. */
6489 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6491 output_pic_addr_const (file, XEXP (x, 0), code);
6493 output_pic_addr_const (file, XEXP (x, 1), code);
6495 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6497 output_pic_addr_const (file, XEXP (x, 1), code);
6499 output_pic_addr_const (file, XEXP (x, 0), code);
6507 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6508 output_pic_addr_const (file, XEXP (x, 0), code);
6510 output_pic_addr_const (file, XEXP (x, 1), code);
6512 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6516 if (XVECLEN (x, 0) != 1)
6518 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6519 switch (XINT (x, 1))
6522 fputs ("@GOT", file);
6525 fputs ("@GOTOFF", file);
6527 case UNSPEC_GOTPCREL:
6528 fputs ("@GOTPCREL(%rip)", file);
6530 case UNSPEC_GOTTPOFF:
6531 /* FIXME: This might be @TPOFF in Sun ld too. */
6532 fputs ("@GOTTPOFF", file);
6535 fputs ("@TPOFF", file);
6539 fputs ("@TPOFF", file);
6541 fputs ("@NTPOFF", file);
6544 fputs ("@DTPOFF", file);
6546 case UNSPEC_GOTNTPOFF:
6548 fputs ("@GOTTPOFF(%rip)", file);
6550 fputs ("@GOTNTPOFF", file);
6552 case UNSPEC_INDNTPOFF:
6553 fputs ("@INDNTPOFF", file);
6556 output_operand_lossage ("invalid UNSPEC as operand");
6562 output_operand_lossage ("invalid expression as operand");
6566 /* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6567 We need to handle our special PIC relocations. */
6570 i386_dwarf_output_addr_const (file, x)
6575 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6579 fprintf (file, "%s", ASM_LONG);
6582 output_pic_addr_const (file, x, '\0');
6584 output_addr_const (file, x);
6588 /* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6589 We need to emit DTP-relative relocations. */
6592 i386_output_dwarf_dtprel (file, size, x)
6597 fputs (ASM_LONG, file);
6598 output_addr_const (file, x);
6599 fputs ("@DTPOFF", file);
6605 fputs (", 0", file);
6612 /* In the name of slightly smaller debug output, and to cater to
6613 general assembler losage, recognize PIC+GOTOFF and turn it back
6614 into a direct symbol reference. */
6617 i386_simplify_dwarf_addr (orig_x)
6622 if (GET_CODE (x) == MEM)
6627 if (GET_CODE (x) != CONST
6628 || GET_CODE (XEXP (x, 0)) != UNSPEC
6629 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6630 || GET_CODE (orig_x) != MEM)
6632 return XVECEXP (XEXP (x, 0), 0, 0);
6635 if (GET_CODE (x) != PLUS
6636 || GET_CODE (XEXP (x, 1)) != CONST)
6639 if (GET_CODE (XEXP (x, 0)) == REG
6640 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6641 /* %ebx + GOT/GOTOFF */
6643 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6645 /* %ebx + %reg * scale + GOT/GOTOFF */
6647 if (GET_CODE (XEXP (y, 0)) == REG
6648 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6650 else if (GET_CODE (XEXP (y, 1)) == REG
6651 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6655 if (GET_CODE (y) != REG
6656 && GET_CODE (y) != MULT
6657 && GET_CODE (y) != ASHIFT)
6663 x = XEXP (XEXP (x, 1), 0);
6664 if (GET_CODE (x) == UNSPEC
6665 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6666 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6669 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6670 return XVECEXP (x, 0, 0);
6673 if (GET_CODE (x) == PLUS
6674 && GET_CODE (XEXP (x, 0)) == UNSPEC
6675 && GET_CODE (XEXP (x, 1)) == CONST_INT
6676 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6677 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6678 && GET_CODE (orig_x) != MEM)))
6680 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6682 return gen_rtx_PLUS (Pmode, y, x);
6690 put_condition_code (code, mode, reverse, fp, file)
6692 enum machine_mode mode;
6698 if (mode == CCFPmode || mode == CCFPUmode)
6700 enum rtx_code second_code, bypass_code;
6701 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6702 if (bypass_code != NIL || second_code != NIL)
6704 code = ix86_fp_compare_code_to_integer (code);
6708 code = reverse_condition (code);
6719 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
6724 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6725 Those same assemblers have the same but opposite losage on cmov. */
6728 suffix = fp ? "nbe" : "a";
6731 if (mode == CCNOmode || mode == CCGOCmode)
6733 else if (mode == CCmode || mode == CCGCmode)
6744 if (mode == CCNOmode || mode == CCGOCmode)
6746 else if (mode == CCmode || mode == CCGCmode)
6755 suffix = fp ? "nb" : "ae";
6758 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
6768 suffix = fp ? "u" : "p";
6771 suffix = fp ? "nu" : "np";
6776 fputs (suffix, file);
6780 print_reg (x, code, file)
6785 if (REGNO (x) == ARG_POINTER_REGNUM
6786 || REGNO (x) == FRAME_POINTER_REGNUM
6787 || REGNO (x) == FLAGS_REG
6788 || REGNO (x) == FPSR_REG)
6791 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6794 if (code == 'w' || MMX_REG_P (x))
6796 else if (code == 'b')
6798 else if (code == 'k')
6800 else if (code == 'q')
6802 else if (code == 'y')
6804 else if (code == 'h')
6807 code = GET_MODE_SIZE (GET_MODE (x));
6809 /* Irritatingly, AMD extended registers use different naming convention
6810 from the normal registers. */
6811 if (REX_INT_REG_P (x))
6818 error ("extended registers have no high halves");
6821 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6824 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6827 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6830 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6833 error ("unsupported operand size for extended register");
6841 if (STACK_TOP_P (x))
6843 fputs ("st(0)", file);
6850 if (! ANY_FP_REG_P (x))
6851 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
6855 fputs (hi_reg_name[REGNO (x)], file);
6858 fputs (qi_reg_name[REGNO (x)], file);
6861 fputs (qi_high_reg_name[REGNO (x)], file);
6868 /* Locate some local-dynamic symbol still in use by this function
6869 so that we can print its name in some tls_local_dynamic_base
6873 get_some_local_dynamic_name ()
6877 if (cfun->machine->some_ld_name)
6878 return cfun->machine->some_ld_name;
6880 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6882 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6883 return cfun->machine->some_ld_name;
6889 get_some_local_dynamic_name_1 (px, data)
6891 void *data ATTRIBUTE_UNUSED;
6895 if (GET_CODE (x) == SYMBOL_REF
6896 && local_dynamic_symbolic_operand (x, Pmode))
6898 cfun->machine->some_ld_name = XSTR (x, 0);
6906 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
6907 C -- print opcode suffix for set/cmov insn.
6908 c -- like C, but print reversed condition
6909 F,f -- likewise, but for floating-point.
6910 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6912 R -- print the prefix for register names.
6913 z -- print the opcode suffix for the size of the current operand.
6914 * -- print a star (in certain assembler syntax)
6915 A -- print an absolute memory reference.
6916 w -- print the operand as if it's a "word" (HImode) even if it isn't.
6917 s -- print a shift double count, followed by the assemblers argument
6919 b -- print the QImode name of the register for the indicated operand.
6920 %b0 would print %al if operands[0] is reg 0.
6921 w -- likewise, print the HImode name of the register.
6922 k -- likewise, print the SImode name of the register.
6923 q -- likewise, print the DImode name of the register.
6924 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6925 y -- print "st(0)" instead of "st" as a register.
6926 D -- print condition for SSE cmp instruction.
6927 P -- if PIC, print an @PLT suffix.
6928 X -- don't print any sort of PIC '@' suffix for a symbol.
6929 & -- print some in-use local-dynamic symbol name.
6933 print_operand (file, x, code)
6943 if (ASSEMBLER_DIALECT == ASM_ATT)
6948 assemble_name (file, get_some_local_dynamic_name ());
6952 if (ASSEMBLER_DIALECT == ASM_ATT)
6954 else if (ASSEMBLER_DIALECT == ASM_INTEL)
6956 /* Intel syntax. For absolute addresses, registers should not
6957 be surrounded by braces. */
6958 if (GET_CODE (x) != REG)
6961 PRINT_OPERAND (file, x, 0);
6969 PRINT_OPERAND (file, x, 0);
6974 if (ASSEMBLER_DIALECT == ASM_ATT)
6979 if (ASSEMBLER_DIALECT == ASM_ATT)
6984 if (ASSEMBLER_DIALECT == ASM_ATT)
6989 if (ASSEMBLER_DIALECT == ASM_ATT)
6994 if (ASSEMBLER_DIALECT == ASM_ATT)
6999 if (ASSEMBLER_DIALECT == ASM_ATT)
7004 /* 387 opcodes don't get size suffixes if the operands are
7006 if (STACK_REG_P (x))
7009 /* Likewise if using Intel opcodes. */
7010 if (ASSEMBLER_DIALECT == ASM_INTEL)
7013 /* This is the size of op from size of operand. */
7014 switch (GET_MODE_SIZE (GET_MODE (x)))
7017 #ifdef HAVE_GAS_FILDS_FISTS
7023 if (GET_MODE (x) == SFmode)
7038 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7040 #ifdef GAS_MNEMONICS
7066 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7068 PRINT_OPERAND (file, x, 0);
7074 /* Little bit of braindamage here. The SSE compare instructions
7075 does use completely different names for the comparisons that the
7076 fp conditional moves. */
7077 switch (GET_CODE (x))
7092 fputs ("unord", file);
7096 fputs ("neq", file);
7100 fputs ("nlt", file);
7104 fputs ("nle", file);
7107 fputs ("ord", file);
7115 #ifdef CMOV_SUN_AS_SYNTAX
7116 if (ASSEMBLER_DIALECT == ASM_ATT)
7118 switch (GET_MODE (x))
7120 case HImode: putc ('w', file); break;
7122 case SFmode: putc ('l', file); break;
7124 case DFmode: putc ('q', file); break;
7132 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7135 #ifdef CMOV_SUN_AS_SYNTAX
7136 if (ASSEMBLER_DIALECT == ASM_ATT)
7139 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7142 /* Like above, but reverse condition */
7144 /* Check to see if argument to %c is really a constant
7145 and not a condition code which needs to be reversed. */
7146 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7148 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7151 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7154 #ifdef CMOV_SUN_AS_SYNTAX
7155 if (ASSEMBLER_DIALECT == ASM_ATT)
7158 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7164 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7167 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7170 int pred_val = INTVAL (XEXP (x, 0));
7172 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7173 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7175 int taken = pred_val > REG_BR_PROB_BASE / 2;
7176 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7178 /* Emit hints only in the case default branch prediction
7179 heuristics would fail. */
7180 if (taken != cputaken)
7182 /* We use 3e (DS) prefix for taken branches and
7183 2e (CS) prefix for not taken branches. */
7185 fputs ("ds ; ", file);
7187 fputs ("cs ; ", file);
7194 output_operand_lossage ("invalid operand code `%c'", code);
7198 if (GET_CODE (x) == REG)
7200 PRINT_REG (x, code, file);
7203 else if (GET_CODE (x) == MEM)
7205 /* No `byte ptr' prefix for call instructions. */
7206 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7209 switch (GET_MODE_SIZE (GET_MODE (x)))
7211 case 1: size = "BYTE"; break;
7212 case 2: size = "WORD"; break;
7213 case 4: size = "DWORD"; break;
7214 case 8: size = "QWORD"; break;
7215 case 12: size = "XWORD"; break;
7216 case 16: size = "XMMWORD"; break;
7221 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7224 else if (code == 'w')
7226 else if (code == 'k')
7230 fputs (" PTR ", file);
7234 if (flag_pic && CONSTANT_ADDRESS_P (x))
7235 output_pic_addr_const (file, x, code);
7236 /* Avoid (%rip) for call operands. */
7237 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
7238 && GET_CODE (x) != CONST_INT)
7239 output_addr_const (file, x);
7240 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7241 output_operand_lossage ("invalid constraints for operand");
7246 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7251 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7252 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7254 if (ASSEMBLER_DIALECT == ASM_ATT)
7256 fprintf (file, "0x%lx", l);
7259 /* These float cases don't actually occur as immediate operands. */
7260 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7264 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7265 fprintf (file, "%s", dstr);
7268 else if (GET_CODE (x) == CONST_DOUBLE
7269 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
7273 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7274 fprintf (file, "%s", dstr);
7281 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7283 if (ASSEMBLER_DIALECT == ASM_ATT)
7286 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7287 || GET_CODE (x) == LABEL_REF)
7289 if (ASSEMBLER_DIALECT == ASM_ATT)
7292 fputs ("OFFSET FLAT:", file);
7295 if (GET_CODE (x) == CONST_INT)
7296 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7298 output_pic_addr_const (file, x, code);
7300 output_addr_const (file, x);
7304 /* Print a memory operand whose address is ADDR. */
7307 print_operand_address (file, addr)
7311 struct ix86_address parts;
7312 rtx base, index, disp;
7315 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
7317 if (ASSEMBLER_DIALECT == ASM_INTEL)
7318 fputs ("DWORD PTR ", file);
7319 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7322 fputs ("fs:0", file);
7324 fputs ("gs:0", file);
7328 if (! ix86_decompose_address (addr, &parts))
7332 index = parts.index;
7334 scale = parts.scale;
7336 if (!base && !index)
7338 /* Displacement only requires special attention. */
7340 if (GET_CODE (disp) == CONST_INT)
7342 if (ASSEMBLER_DIALECT == ASM_INTEL)
7344 if (USER_LABEL_PREFIX[0] == 0)
7346 fputs ("ds:", file);
7348 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
7351 output_pic_addr_const (file, addr, 0);
7353 output_addr_const (file, addr);
7355 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7357 && ((GET_CODE (addr) == SYMBOL_REF
7358 && ! tls_symbolic_operand (addr, GET_MODE (addr)))
7359 || GET_CODE (addr) == LABEL_REF
7360 || (GET_CODE (addr) == CONST
7361 && GET_CODE (XEXP (addr, 0)) == PLUS
7362 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7363 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
7364 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
7365 fputs ("(%rip)", file);
7369 if (ASSEMBLER_DIALECT == ASM_ATT)
7374 output_pic_addr_const (file, disp, 0);
7375 else if (GET_CODE (disp) == LABEL_REF)
7376 output_asm_label (disp);
7378 output_addr_const (file, disp);
7383 PRINT_REG (base, 0, file);
7387 PRINT_REG (index, 0, file);
7389 fprintf (file, ",%d", scale);
7395 rtx offset = NULL_RTX;
7399 /* Pull out the offset of a symbol; print any symbol itself. */
7400 if (GET_CODE (disp) == CONST
7401 && GET_CODE (XEXP (disp, 0)) == PLUS
7402 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7404 offset = XEXP (XEXP (disp, 0), 1);
7405 disp = gen_rtx_CONST (VOIDmode,
7406 XEXP (XEXP (disp, 0), 0));
7410 output_pic_addr_const (file, disp, 0);
7411 else if (GET_CODE (disp) == LABEL_REF)
7412 output_asm_label (disp);
7413 else if (GET_CODE (disp) == CONST_INT)
7416 output_addr_const (file, disp);
7422 PRINT_REG (base, 0, file);
7425 if (INTVAL (offset) >= 0)
7427 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7431 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7438 PRINT_REG (index, 0, file);
7440 fprintf (file, "*%d", scale);
7448 output_addr_const_extra (file, x)
7454 if (GET_CODE (x) != UNSPEC)
7457 op = XVECEXP (x, 0, 0);
7458 switch (XINT (x, 1))
7460 case UNSPEC_GOTTPOFF:
7461 output_addr_const (file, op);
7462 /* FIXME: This might be @TPOFF in Sun ld. */
7463 fputs ("@GOTTPOFF", file);
7466 output_addr_const (file, op);
7467 fputs ("@TPOFF", file);
7470 output_addr_const (file, op);
7472 fputs ("@TPOFF", file);
7474 fputs ("@NTPOFF", file);
7477 output_addr_const (file, op);
7478 fputs ("@DTPOFF", file);
7480 case UNSPEC_GOTNTPOFF:
7481 output_addr_const (file, op);
7483 fputs ("@GOTTPOFF(%rip)", file);
7485 fputs ("@GOTNTPOFF", file);
7487 case UNSPEC_INDNTPOFF:
7488 output_addr_const (file, op);
7489 fputs ("@INDNTPOFF", file);
7499 /* Split one or more DImode RTL references into pairs of SImode
7500 references. The RTL can be REG, offsettable MEM, integer constant, or
7501 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7502 split and "num" is its length. lo_half and hi_half are output arrays
7503 that parallel "operands". */
7506 split_di (operands, num, lo_half, hi_half)
7509 rtx lo_half[], hi_half[];
7513 rtx op = operands[num];
7515 /* simplify_subreg refuse to split volatile memory addresses,
7516 but we still have to handle it. */
7517 if (GET_CODE (op) == MEM)
7519 lo_half[num] = adjust_address (op, SImode, 0);
7520 hi_half[num] = adjust_address (op, SImode, 4);
7524 lo_half[num] = simplify_gen_subreg (SImode, op,
7525 GET_MODE (op) == VOIDmode
7526 ? DImode : GET_MODE (op), 0);
7527 hi_half[num] = simplify_gen_subreg (SImode, op,
7528 GET_MODE (op) == VOIDmode
7529 ? DImode : GET_MODE (op), 4);
7533 /* Split one or more TImode RTL references into pairs of SImode
7534 references. The RTL can be REG, offsettable MEM, integer constant, or
7535 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7536 split and "num" is its length. lo_half and hi_half are output arrays
7537 that parallel "operands". */
7540 split_ti (operands, num, lo_half, hi_half)
7543 rtx lo_half[], hi_half[];
7547 rtx op = operands[num];
7549 /* simplify_subreg refuse to split volatile memory addresses, but we
7550 still have to handle it. */
7551 if (GET_CODE (op) == MEM)
7553 lo_half[num] = adjust_address (op, DImode, 0);
7554 hi_half[num] = adjust_address (op, DImode, 8);
7558 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7559 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7564 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7565 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7566 is the expression of the binary operation. The output may either be
7567 emitted here, or returned to the caller, like all output_* functions.
7569 There is no guarantee that the operands are the same mode, as they
7570 might be within FLOAT or FLOAT_EXTEND expressions. */
7572 #ifndef SYSV386_COMPAT
7573 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7574 wants to fix the assemblers because that causes incompatibility
7575 with gcc. No-one wants to fix gcc because that causes
7576 incompatibility with assemblers... You can use the option of
7577 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7578 #define SYSV386_COMPAT 1
7582 output_387_binary_op (insn, operands)
7586 static char buf[30];
7589 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7591 #ifdef ENABLE_CHECKING
7592 /* Even if we do not want to check the inputs, this documents input
7593 constraints. Which helps in understanding the following code. */
7594 if (STACK_REG_P (operands[0])
7595 && ((REG_P (operands[1])
7596 && REGNO (operands[0]) == REGNO (operands[1])
7597 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7598 || (REG_P (operands[2])
7599 && REGNO (operands[0]) == REGNO (operands[2])
7600 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7601 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7607 switch (GET_CODE (operands[3]))
7610 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7611 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7619 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7620 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7628 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7629 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7637 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7638 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7652 if (GET_MODE (operands[0]) == SFmode)
7653 strcat (buf, "ss\t{%2, %0|%0, %2}");
7655 strcat (buf, "sd\t{%2, %0|%0, %2}");
7660 switch (GET_CODE (operands[3]))
7664 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7666 rtx temp = operands[2];
7667 operands[2] = operands[1];
7671 /* know operands[0] == operands[1]. */
7673 if (GET_CODE (operands[2]) == MEM)
7679 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7681 if (STACK_TOP_P (operands[0]))
7682 /* How is it that we are storing to a dead operand[2]?
7683 Well, presumably operands[1] is dead too. We can't
7684 store the result to st(0) as st(0) gets popped on this
7685 instruction. Instead store to operands[2] (which I
7686 think has to be st(1)). st(1) will be popped later.
7687 gcc <= 2.8.1 didn't have this check and generated
7688 assembly code that the Unixware assembler rejected. */
7689 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7691 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7695 if (STACK_TOP_P (operands[0]))
7696 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7698 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7703 if (GET_CODE (operands[1]) == MEM)
7709 if (GET_CODE (operands[2]) == MEM)
7715 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7718 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7719 derived assemblers, confusingly reverse the direction of
7720 the operation for fsub{r} and fdiv{r} when the
7721 destination register is not st(0). The Intel assembler
7722 doesn't have this brain damage. Read !SYSV386_COMPAT to
7723 figure out what the hardware really does. */
7724 if (STACK_TOP_P (operands[0]))
7725 p = "{p\t%0, %2|rp\t%2, %0}";
7727 p = "{rp\t%2, %0|p\t%0, %2}";
7729 if (STACK_TOP_P (operands[0]))
7730 /* As above for fmul/fadd, we can't store to st(0). */
7731 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7733 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7738 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7741 if (STACK_TOP_P (operands[0]))
7742 p = "{rp\t%0, %1|p\t%1, %0}";
7744 p = "{p\t%1, %0|rp\t%0, %1}";
7746 if (STACK_TOP_P (operands[0]))
7747 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7749 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7754 if (STACK_TOP_P (operands[0]))
7756 if (STACK_TOP_P (operands[1]))
7757 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7759 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7762 else if (STACK_TOP_P (operands[1]))
7765 p = "{\t%1, %0|r\t%0, %1}";
7767 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7773 p = "{r\t%2, %0|\t%0, %2}";
7775 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7788 /* Output code to initialize control word copies used by
7789 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7790 is set to control word rounding downwards. */
7792 emit_i387_cw_initialization (normal, round_down)
7793 rtx normal, round_down;
7795 rtx reg = gen_reg_rtx (HImode);
7797 emit_insn (gen_x86_fnstcw_1 (normal));
7798 emit_move_insn (reg, normal);
7799 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7801 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7803 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7804 emit_move_insn (round_down, reg);
7807 /* Output code for INSN to convert a float to a signed int. OPERANDS
7808 are the insn operands. The output may be [HSD]Imode and the input
7809 operand may be [SDX]Fmode. */
7812 output_fix_trunc (insn, operands)
7816 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7817 int dimode_p = GET_MODE (operands[0]) == DImode;
7819 /* Jump through a hoop or two for DImode, since the hardware has no
7820 non-popping instruction. We used to do this a different way, but
7821 that was somewhat fragile and broke with post-reload splitters. */
7822 if (dimode_p && !stack_top_dies)
7823 output_asm_insn ("fld\t%y1", operands);
7825 if (!STACK_TOP_P (operands[1]))
7828 if (GET_CODE (operands[0]) != MEM)
7831 output_asm_insn ("fldcw\t%3", operands);
7832 if (stack_top_dies || dimode_p)
7833 output_asm_insn ("fistp%z0\t%0", operands);
7835 output_asm_insn ("fist%z0\t%0", operands);
7836 output_asm_insn ("fldcw\t%2", operands);
7841 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7842 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7843 when fucom should be used. */
7846 output_fp_compare (insn, operands, eflags_p, unordered_p)
7849 int eflags_p, unordered_p;
7852 rtx cmp_op0 = operands[0];
7853 rtx cmp_op1 = operands[1];
7854 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
7859 cmp_op1 = operands[2];
7863 if (GET_MODE (operands[0]) == SFmode)
7865 return "ucomiss\t{%1, %0|%0, %1}";
7867 return "comiss\t{%1, %0|%0, %1}";
7870 return "ucomisd\t{%1, %0|%0, %1}";
7872 return "comisd\t{%1, %0|%0, %1}";
7875 if (! STACK_TOP_P (cmp_op0))
7878 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
7880 if (STACK_REG_P (cmp_op1)
7882 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7883 && REGNO (cmp_op1) != FIRST_STACK_REG)
7885 /* If both the top of the 387 stack dies, and the other operand
7886 is also a stack register that dies, then this must be a
7887 `fcompp' float compare */
7891 /* There is no double popping fcomi variant. Fortunately,
7892 eflags is immune from the fstp's cc clobbering. */
7894 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7896 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7904 return "fucompp\n\tfnstsw\t%0";
7906 return "fcompp\n\tfnstsw\t%0";
7919 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
7921 static const char * const alt[24] =
7933 "fcomi\t{%y1, %0|%0, %y1}",
7934 "fcomip\t{%y1, %0|%0, %y1}",
7935 "fucomi\t{%y1, %0|%0, %y1}",
7936 "fucomip\t{%y1, %0|%0, %y1}",
7943 "fcom%z2\t%y2\n\tfnstsw\t%0",
7944 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7945 "fucom%z2\t%y2\n\tfnstsw\t%0",
7946 "fucomp%z2\t%y2\n\tfnstsw\t%0",
7948 "ficom%z2\t%y2\n\tfnstsw\t%0",
7949 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7957 mask = eflags_p << 3;
7958 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7959 mask |= unordered_p << 1;
7960 mask |= stack_top_dies;
7973 ix86_output_addr_vec_elt (file, value)
7977 const char *directive = ASM_LONG;
7982 directive = ASM_QUAD;
7988 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7992 ix86_output_addr_diff_elt (file, value, rel)
7997 fprintf (file, "%s%s%d-%s%d\n",
7998 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7999 else if (HAVE_AS_GOTOFF_IN_DATA)
8000 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8002 else if (TARGET_MACHO)
8003 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
8004 machopic_function_base_name () + 1);
8007 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8008 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8011 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8015 ix86_expand_clear (dest)
8020 /* We play register width games, which are only valid after reload. */
8021 if (!reload_completed)
8024 /* Avoid HImode and its attendant prefix byte. */
8025 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8026 dest = gen_rtx_REG (SImode, REGNO (dest));
8028 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8030 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8031 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8033 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8034 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8040 /* X is an unchanging MEM. If it is a constant pool reference, return
8041 the constant pool rtx, else NULL. */
8044 maybe_get_pool_constant (x)
8049 if (flag_pic && ! TARGET_64BIT)
8051 if (GET_CODE (x) != PLUS)
8053 if (XEXP (x, 0) != pic_offset_table_rtx)
8056 if (GET_CODE (x) != CONST)
8059 if (GET_CODE (x) != UNSPEC)
8061 if (XINT (x, 1) != UNSPEC_GOTOFF)
8063 x = XVECEXP (x, 0, 0);
8066 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8067 return get_pool_constant (x);
8073 ix86_expand_move (mode, operands)
8074 enum machine_mode mode;
8077 int strict = (reload_in_progress || reload_completed);
8078 rtx insn, op0, op1, tmp;
8083 if (tls_symbolic_operand (op1, Pmode))
8085 op1 = legitimize_address (op1, op1, VOIDmode);
8086 if (GET_CODE (op0) == MEM)
8088 tmp = gen_reg_rtx (mode);
8089 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
8093 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8098 rtx temp = ((reload_in_progress
8099 || ((op0 && GET_CODE (op0) == REG)
8101 ? op0 : gen_reg_rtx (Pmode));
8102 op1 = machopic_indirect_data_reference (op1, temp);
8103 op1 = machopic_legitimize_pic_address (op1, mode,
8104 temp == op1 ? 0 : temp);
8108 if (MACHOPIC_INDIRECT)
8109 op1 = machopic_indirect_data_reference (op1, 0);
8113 insn = gen_rtx_SET (VOIDmode, op0, op1);
8117 #endif /* TARGET_MACHO */
8118 if (GET_CODE (op0) == MEM)
8119 op1 = force_reg (Pmode, op1);
8123 if (GET_CODE (temp) != REG)
8124 temp = gen_reg_rtx (Pmode);
8125 temp = legitimize_pic_address (op1, temp);
8133 if (GET_CODE (op0) == MEM
8134 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8135 || !push_operand (op0, mode))
8136 && GET_CODE (op1) == MEM)
8137 op1 = force_reg (mode, op1);
8139 if (push_operand (op0, mode)
8140 && ! general_no_elim_operand (op1, mode))
8141 op1 = copy_to_mode_reg (mode, op1);
8143 /* Force large constants in 64bit compilation into register
8144 to get them CSEed. */
8145 if (TARGET_64BIT && mode == DImode
8146 && immediate_operand (op1, mode)
8147 && !x86_64_zero_extended_value (op1)
8148 && !register_operand (op0, mode)
8149 && optimize && !reload_completed && !reload_in_progress)
8150 op1 = copy_to_mode_reg (mode, op1);
8152 if (FLOAT_MODE_P (mode))
8154 /* If we are loading a floating point constant to a register,
8155 force the value to memory now, since we'll get better code
8156 out the back end. */
8160 else if (GET_CODE (op1) == CONST_DOUBLE
8161 && register_operand (op0, mode))
8162 op1 = validize_mem (force_const_mem (mode, op1));
8166 insn = gen_rtx_SET (VOIDmode, op0, op1);
8172 ix86_expand_vector_move (mode, operands)
8173 enum machine_mode mode;
8176 /* Force constants other than zero into memory. We do not know how
8177 the instructions used to build constants modify the upper 64 bits
8178 of the register, once we have that information we may be able
8179 to handle some of them more efficiently. */
8180 if ((reload_in_progress | reload_completed) == 0
8181 && register_operand (operands[0], mode)
8182 && CONSTANT_P (operands[1]))
8183 operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8185 /* Make operand1 a register if it isn't already. */
8187 && !register_operand (operands[0], mode)
8188 && !register_operand (operands[1], mode))
8190 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8191 emit_move_insn (operands[0], temp);
8195 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8198 /* Attempt to expand a binary operator. Make the expansion closer to the
8199 actual machine, then just general_operand, which will allow 3 separate
8200 memory references (one output, two input) in a single insn. */
8203 ix86_expand_binary_operator (code, mode, operands)
8205 enum machine_mode mode;
8208 int matching_memory;
8209 rtx src1, src2, dst, op, clob;
8215 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8216 if (GET_RTX_CLASS (code) == 'c'
8217 && (rtx_equal_p (dst, src2)
8218 || immediate_operand (src1, mode)))
8225 /* If the destination is memory, and we do not have matching source
8226 operands, do things in registers. */
8227 matching_memory = 0;
8228 if (GET_CODE (dst) == MEM)
8230 if (rtx_equal_p (dst, src1))
8231 matching_memory = 1;
8232 else if (GET_RTX_CLASS (code) == 'c'
8233 && rtx_equal_p (dst, src2))
8234 matching_memory = 2;
8236 dst = gen_reg_rtx (mode);
8239 /* Both source operands cannot be in memory. */
8240 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8242 if (matching_memory != 2)
8243 src2 = force_reg (mode, src2);
8245 src1 = force_reg (mode, src1);
8248 /* If the operation is not commutable, source 1 cannot be a constant
8249 or non-matching memory. */
8250 if ((CONSTANT_P (src1)
8251 || (!matching_memory && GET_CODE (src1) == MEM))
8252 && GET_RTX_CLASS (code) != 'c')
8253 src1 = force_reg (mode, src1);
8255 /* If optimizing, copy to regs to improve CSE */
8256 if (optimize && ! no_new_pseudos)
8258 if (GET_CODE (dst) == MEM)
8259 dst = gen_reg_rtx (mode);
8260 if (GET_CODE (src1) == MEM)
8261 src1 = force_reg (mode, src1);
8262 if (GET_CODE (src2) == MEM)
8263 src2 = force_reg (mode, src2);
8266 /* Emit the instruction. */
8268 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8269 if (reload_in_progress)
8271 /* Reload doesn't know about the flags register, and doesn't know that
8272 it doesn't want to clobber it. We can only do this with PLUS. */
8279 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8280 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8283 /* Fix up the destination if needed. */
8284 if (dst != operands[0])
8285 emit_move_insn (operands[0], dst);
8288 /* Return TRUE or FALSE depending on whether the binary operator meets the
8289 appropriate constraints. */
8292 ix86_binary_operator_ok (code, mode, operands)
8294 enum machine_mode mode ATTRIBUTE_UNUSED;
8297 /* Both source operands cannot be in memory. */
8298 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8300 /* If the operation is not commutable, source 1 cannot be a constant. */
8301 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8303 /* If the destination is memory, we must have a matching source operand. */
8304 if (GET_CODE (operands[0]) == MEM
8305 && ! (rtx_equal_p (operands[0], operands[1])
8306 || (GET_RTX_CLASS (code) == 'c'
8307 && rtx_equal_p (operands[0], operands[2]))))
8309 /* If the operation is not commutable and the source 1 is memory, we must
8310 have a matching destination. */
8311 if (GET_CODE (operands[1]) == MEM
8312 && GET_RTX_CLASS (code) != 'c'
8313 && ! rtx_equal_p (operands[0], operands[1]))
8318 /* Attempt to expand a unary operator. Make the expansion closer to the
8319 actual machine, then just general_operand, which will allow 2 separate
8320 memory references (one output, one input) in a single insn. */
8323 ix86_expand_unary_operator (code, mode, operands)
8325 enum machine_mode mode;
8328 int matching_memory;
8329 rtx src, dst, op, clob;
8334 /* If the destination is memory, and we do not have matching source
8335 operands, do things in registers. */
8336 matching_memory = 0;
8337 if (GET_CODE (dst) == MEM)
8339 if (rtx_equal_p (dst, src))
8340 matching_memory = 1;
8342 dst = gen_reg_rtx (mode);
8345 /* When source operand is memory, destination must match. */
8346 if (!matching_memory && GET_CODE (src) == MEM)
8347 src = force_reg (mode, src);
8349 /* If optimizing, copy to regs to improve CSE */
8350 if (optimize && ! no_new_pseudos)
8352 if (GET_CODE (dst) == MEM)
8353 dst = gen_reg_rtx (mode);
8354 if (GET_CODE (src) == MEM)
8355 src = force_reg (mode, src);
8358 /* Emit the instruction. */
8360 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8361 if (reload_in_progress || code == NOT)
8363 /* Reload doesn't know about the flags register, and doesn't know that
8364 it doesn't want to clobber it. */
8371 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8372 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8375 /* Fix up the destination if needed. */
8376 if (dst != operands[0])
8377 emit_move_insn (operands[0], dst);
8380 /* Return TRUE or FALSE depending on whether the unary operator meets the
8381 appropriate constraints. */
8384 ix86_unary_operator_ok (code, mode, operands)
8385 enum rtx_code code ATTRIBUTE_UNUSED;
8386 enum machine_mode mode ATTRIBUTE_UNUSED;
8387 rtx operands[2] ATTRIBUTE_UNUSED;
8389 /* If one of operands is memory, source and destination must match. */
8390 if ((GET_CODE (operands[0]) == MEM
8391 || GET_CODE (operands[1]) == MEM)
8392 && ! rtx_equal_p (operands[0], operands[1]))
8397 /* Return TRUE or FALSE depending on whether the first SET in INSN
8398 has source and destination with matching CC modes, and that the
8399 CC mode is at least as constrained as REQ_MODE. */
8402 ix86_match_ccmode (insn, req_mode)
8404 enum machine_mode req_mode;
8407 enum machine_mode set_mode;
8409 set = PATTERN (insn);
8410 if (GET_CODE (set) == PARALLEL)
8411 set = XVECEXP (set, 0, 0);
8412 if (GET_CODE (set) != SET)
8414 if (GET_CODE (SET_SRC (set)) != COMPARE)
8417 set_mode = GET_MODE (SET_DEST (set));
8421 if (req_mode != CCNOmode
8422 && (req_mode != CCmode
8423 || XEXP (SET_SRC (set), 1) != const0_rtx))
8427 if (req_mode == CCGCmode)
8431 if (req_mode == CCGOCmode || req_mode == CCNOmode)
8435 if (req_mode == CCZmode)
8445 return (GET_MODE (SET_SRC (set)) == set_mode);
8448 /* Generate insn patterns to do an integer compare of OPERANDS. */
8451 ix86_expand_int_compare (code, op0, op1)
8455 enum machine_mode cmpmode;
8458 cmpmode = SELECT_CC_MODE (code, op0, op1);
8459 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8461 /* This is very simple, but making the interface the same as in the
8462 FP case makes the rest of the code easier. */
8463 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8464 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8466 /* Return the test that should be put into the flags user, i.e.
8467 the bcc, scc, or cmov instruction. */
8468 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8471 /* Figure out whether to use ordered or unordered fp comparisons.
8472 Return the appropriate mode to use. */
8475 ix86_fp_compare_mode (code)
8476 enum rtx_code code ATTRIBUTE_UNUSED;
8478 /* ??? In order to make all comparisons reversible, we do all comparisons
8479 non-trapping when compiling for IEEE. Once gcc is able to distinguish
8480 all forms trapping and nontrapping comparisons, we can make inequality
8481 comparisons trapping again, since it results in better code when using
8482 FCOM based compares. */
8483 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8487 ix86_cc_mode (code, op0, op1)
8491 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8492 return ix86_fp_compare_mode (code);
8495 /* Only zero flag is needed. */
8497 case NE: /* ZF!=0 */
8499 /* Codes needing carry flag. */
8500 case GEU: /* CF=0 */
8501 case GTU: /* CF=0 & ZF=0 */
8502 case LTU: /* CF=1 */
8503 case LEU: /* CF=1 | ZF=1 */
8505 /* Codes possibly doable only with sign flag when
8506 comparing against zero. */
8507 case GE: /* SF=OF or SF=0 */
8508 case LT: /* SF<>OF or SF=1 */
8509 if (op1 == const0_rtx)
8512 /* For other cases Carry flag is not required. */
8514 /* Codes doable only with sign flag when comparing
8515 against zero, but we miss jump instruction for it
8516 so we need to use relational tests against overflow
8517 that thus needs to be zero. */
8518 case GT: /* ZF=0 & SF=OF */
8519 case LE: /* ZF=1 | SF<>OF */
8520 if (op1 == const0_rtx)
8524 /* strcmp pattern do (use flags) and combine may ask us for proper
8533 /* Return true if we should use an FCOMI instruction for this fp comparison. */
8536 ix86_use_fcomi_compare (code)
8537 enum rtx_code code ATTRIBUTE_UNUSED;
8539 enum rtx_code swapped_code = swap_condition (code);
8540 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8541 || (ix86_fp_comparison_cost (swapped_code)
8542 == ix86_fp_comparison_fcomi_cost (swapped_code)));
8545 /* Swap, force into registers, or otherwise massage the two operands
8546 to a fp comparison. The operands are updated in place; the new
8547 comparison code is returned. */
8549 static enum rtx_code
8550 ix86_prepare_fp_compare_args (code, pop0, pop1)
8554 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8555 rtx op0 = *pop0, op1 = *pop1;
8556 enum machine_mode op_mode = GET_MODE (op0);
8557 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8559 /* All of the unordered compare instructions only work on registers.
8560 The same is true of the XFmode compare instructions. The same is
8561 true of the fcomi compare instructions. */
8564 && (fpcmp_mode == CCFPUmode
8565 || op_mode == XFmode
8566 || op_mode == TFmode
8567 || ix86_use_fcomi_compare (code)))
8569 op0 = force_reg (op_mode, op0);
8570 op1 = force_reg (op_mode, op1);
8574 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8575 things around if they appear profitable, otherwise force op0
8578 if (standard_80387_constant_p (op0) == 0
8579 || (GET_CODE (op0) == MEM
8580 && ! (standard_80387_constant_p (op1) == 0
8581 || GET_CODE (op1) == MEM)))
8584 tmp = op0, op0 = op1, op1 = tmp;
8585 code = swap_condition (code);
8588 if (GET_CODE (op0) != REG)
8589 op0 = force_reg (op_mode, op0);
8591 if (CONSTANT_P (op1))
8593 if (standard_80387_constant_p (op1))
8594 op1 = force_reg (op_mode, op1);
8596 op1 = validize_mem (force_const_mem (op_mode, op1));
8600 /* Try to rearrange the comparison to make it cheaper. */
8601 if (ix86_fp_comparison_cost (code)
8602 > ix86_fp_comparison_cost (swap_condition (code))
8603 && (GET_CODE (op1) == REG || !no_new_pseudos))
8606 tmp = op0, op0 = op1, op1 = tmp;
8607 code = swap_condition (code);
8608 if (GET_CODE (op0) != REG)
8609 op0 = force_reg (op_mode, op0);
8617 /* Convert comparison codes we use to represent FP comparison to integer
8618 code that will result in proper branch. Return UNKNOWN if no such code
8620 static enum rtx_code
8621 ix86_fp_compare_code_to_integer (code)
8651 /* Split comparison code CODE into comparisons we can do using branch
8652 instructions. BYPASS_CODE is comparison code for branch that will
8653 branch around FIRST_CODE and SECOND_CODE. If some of branches
8654 is not required, set value to NIL.
8655 We never require more than two branches. */
8657 ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8658 enum rtx_code code, *bypass_code, *first_code, *second_code;
8664 /* The fcomi comparison sets flags as follows:
8674 case GT: /* GTU - CF=0 & ZF=0 */
8675 case GE: /* GEU - CF=0 */
8676 case ORDERED: /* PF=0 */
8677 case UNORDERED: /* PF=1 */
8678 case UNEQ: /* EQ - ZF=1 */
8679 case UNLT: /* LTU - CF=1 */
8680 case UNLE: /* LEU - CF=1 | ZF=1 */
8681 case LTGT: /* EQ - ZF=0 */
8683 case LT: /* LTU - CF=1 - fails on unordered */
8685 *bypass_code = UNORDERED;
8687 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8689 *bypass_code = UNORDERED;
8691 case EQ: /* EQ - ZF=1 - fails on unordered */
8693 *bypass_code = UNORDERED;
8695 case NE: /* NE - ZF=0 - fails on unordered */
8697 *second_code = UNORDERED;
8699 case UNGE: /* GEU - CF=0 - fails on unordered */
8701 *second_code = UNORDERED;
8703 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8705 *second_code = UNORDERED;
8710 if (!TARGET_IEEE_FP)
8717 /* Return cost of comparison done fcom + arithmetics operations on AX.
8718 All following functions do use number of instructions as a cost metrics.
8719 In future this should be tweaked to compute bytes for optimize_size and
8720 take into account performance of various instructions on various CPUs. */
8722 ix86_fp_comparison_arithmetics_cost (code)
8725 if (!TARGET_IEEE_FP)
8727 /* The cost of code output by ix86_expand_fp_compare. */
8755 /* Return cost of comparison done using fcomi operation.
8756 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8758 ix86_fp_comparison_fcomi_cost (code)
8761 enum rtx_code bypass_code, first_code, second_code;
8762 /* Return arbitrarily high cost when instruction is not supported - this
8763 prevents gcc from using it. */
8766 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8767 return (bypass_code != NIL || second_code != NIL) + 2;
8770 /* Return cost of comparison done using sahf operation.
8771 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8773 ix86_fp_comparison_sahf_cost (code)
8776 enum rtx_code bypass_code, first_code, second_code;
8777 /* Return arbitrarily high cost when instruction is not preferred - this
8778 avoids gcc from using it. */
8779 if (!TARGET_USE_SAHF && !optimize_size)
8781 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8782 return (bypass_code != NIL || second_code != NIL) + 3;
8785 /* Compute cost of the comparison done using any method.
8786 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8788 ix86_fp_comparison_cost (code)
8791 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8794 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8795 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8797 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8798 if (min > sahf_cost)
8800 if (min > fcomi_cost)
8805 /* Generate insn patterns to do a floating point compare of OPERANDS. */
8808 ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
8810 rtx op0, op1, scratch;
8814 enum machine_mode fpcmp_mode, intcmp_mode;
8816 int cost = ix86_fp_comparison_cost (code);
8817 enum rtx_code bypass_code, first_code, second_code;
8819 fpcmp_mode = ix86_fp_compare_mode (code);
8820 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8823 *second_test = NULL_RTX;
8825 *bypass_test = NULL_RTX;
8827 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8829 /* Do fcomi/sahf based test when profitable. */
8830 if ((bypass_code == NIL || bypass_test)
8831 && (second_code == NIL || second_test)
8832 && ix86_fp_comparison_arithmetics_cost (code) > cost)
8836 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8837 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8843 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8844 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8846 scratch = gen_reg_rtx (HImode);
8847 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8848 emit_insn (gen_x86_sahf_1 (scratch));
8851 /* The FP codes work out to act like unsigned. */
8852 intcmp_mode = fpcmp_mode;
8854 if (bypass_code != NIL)
8855 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8856 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8858 if (second_code != NIL)
8859 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8860 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8865 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
8866 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8867 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
8869 scratch = gen_reg_rtx (HImode);
8870 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8872 /* In the unordered case, we have to check C2 for NaN's, which
8873 doesn't happen to work out to anything nice combination-wise.
8874 So do some bit twiddling on the value we've got in AH to come
8875 up with an appropriate set of condition codes. */
8877 intcmp_mode = CCNOmode;
8882 if (code == GT || !TARGET_IEEE_FP)
8884 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8889 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8890 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8891 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8892 intcmp_mode = CCmode;
8898 if (code == LT && TARGET_IEEE_FP)
8900 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8901 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
8902 intcmp_mode = CCmode;
8907 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8913 if (code == GE || !TARGET_IEEE_FP)
8915 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
8920 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8921 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8928 if (code == LE && TARGET_IEEE_FP)
8930 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8931 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8932 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8933 intcmp_mode = CCmode;
8938 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8944 if (code == EQ && TARGET_IEEE_FP)
8946 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8947 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
8948 intcmp_mode = CCmode;
8953 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8960 if (code == NE && TARGET_IEEE_FP)
8962 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8963 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8969 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8975 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8979 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8988 /* Return the test that should be put into the flags user, i.e.
8989 the bcc, scc, or cmov instruction. */
8990 return gen_rtx_fmt_ee (code, VOIDmode,
8991 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8996 ix86_expand_compare (code, second_test, bypass_test)
8998 rtx *second_test, *bypass_test;
9001 op0 = ix86_compare_op0;
9002 op1 = ix86_compare_op1;
9005 *second_test = NULL_RTX;
9007 *bypass_test = NULL_RTX;
9009 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9010 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9011 second_test, bypass_test);
9013 ret = ix86_expand_int_compare (code, op0, op1);
9018 /* Return true if the CODE will result in nontrivial jump sequence. */
9020 ix86_fp_jump_nontrivial_p (code)
9023 enum rtx_code bypass_code, first_code, second_code;
9026 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9027 return bypass_code != NIL || second_code != NIL;
9031 ix86_expand_branch (code, label)
9037 switch (GET_MODE (ix86_compare_op0))
9043 tmp = ix86_expand_compare (code, NULL, NULL);
9044 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9045 gen_rtx_LABEL_REF (VOIDmode, label),
9047 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9057 enum rtx_code bypass_code, first_code, second_code;
9059 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9062 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9064 /* Check whether we will use the natural sequence with one jump. If
9065 so, we can expand jump early. Otherwise delay expansion by
9066 creating compound insn to not confuse optimizers. */
9067 if (bypass_code == NIL && second_code == NIL
9070 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9071 gen_rtx_LABEL_REF (VOIDmode, label),
9076 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9077 ix86_compare_op0, ix86_compare_op1);
9078 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9079 gen_rtx_LABEL_REF (VOIDmode, label),
9081 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9083 use_fcomi = ix86_use_fcomi_compare (code);
9084 vec = rtvec_alloc (3 + !use_fcomi);
9085 RTVEC_ELT (vec, 0) = tmp;
9087 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9089 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9092 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9094 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9102 /* Expand DImode branch into multiple compare+branch. */
9104 rtx lo[2], hi[2], label2;
9105 enum rtx_code code1, code2, code3;
9107 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9109 tmp = ix86_compare_op0;
9110 ix86_compare_op0 = ix86_compare_op1;
9111 ix86_compare_op1 = tmp;
9112 code = swap_condition (code);
9114 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9115 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9117 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9118 avoid two branches. This costs one extra insn, so disable when
9119 optimizing for size. */
9121 if ((code == EQ || code == NE)
9123 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9128 if (hi[1] != const0_rtx)
9129 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9130 NULL_RTX, 0, OPTAB_WIDEN);
9133 if (lo[1] != const0_rtx)
9134 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9135 NULL_RTX, 0, OPTAB_WIDEN);
9137 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9138 NULL_RTX, 0, OPTAB_WIDEN);
9140 ix86_compare_op0 = tmp;
9141 ix86_compare_op1 = const0_rtx;
9142 ix86_expand_branch (code, label);
9146 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9147 op1 is a constant and the low word is zero, then we can just
9148 examine the high word. */
9150 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9153 case LT: case LTU: case GE: case GEU:
9154 ix86_compare_op0 = hi[0];
9155 ix86_compare_op1 = hi[1];
9156 ix86_expand_branch (code, label);
9162 /* Otherwise, we need two or three jumps. */
9164 label2 = gen_label_rtx ();
9167 code2 = swap_condition (code);
9168 code3 = unsigned_condition (code);
9172 case LT: case GT: case LTU: case GTU:
9175 case LE: code1 = LT; code2 = GT; break;
9176 case GE: code1 = GT; code2 = LT; break;
9177 case LEU: code1 = LTU; code2 = GTU; break;
9178 case GEU: code1 = GTU; code2 = LTU; break;
9180 case EQ: code1 = NIL; code2 = NE; break;
9181 case NE: code2 = NIL; break;
9189 * if (hi(a) < hi(b)) goto true;
9190 * if (hi(a) > hi(b)) goto false;
9191 * if (lo(a) < lo(b)) goto true;
9195 ix86_compare_op0 = hi[0];
9196 ix86_compare_op1 = hi[1];
9199 ix86_expand_branch (code1, label);
9201 ix86_expand_branch (code2, label2);
9203 ix86_compare_op0 = lo[0];
9204 ix86_compare_op1 = lo[1];
9205 ix86_expand_branch (code3, label);
9208 emit_label (label2);
9217 /* Split branch based on floating point condition. */
9219 ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
9221 rtx op1, op2, target1, target2, tmp;
9224 rtx label = NULL_RTX;
9226 int bypass_probability = -1, second_probability = -1, probability = -1;
9229 if (target2 != pc_rtx)
9232 code = reverse_condition_maybe_unordered (code);
9237 condition = ix86_expand_fp_compare (code, op1, op2,
9238 tmp, &second, &bypass);
9240 if (split_branch_probability >= 0)
9242 /* Distribute the probabilities across the jumps.
9243 Assume the BYPASS and SECOND to be always test
9245 probability = split_branch_probability;
9247 /* Value of 1 is low enough to make no need for probability
9248 to be updated. Later we may run some experiments and see
9249 if unordered values are more frequent in practice. */
9251 bypass_probability = 1;
9253 second_probability = 1;
9255 if (bypass != NULL_RTX)
9257 label = gen_label_rtx ();
9258 i = emit_jump_insn (gen_rtx_SET
9260 gen_rtx_IF_THEN_ELSE (VOIDmode,
9262 gen_rtx_LABEL_REF (VOIDmode,
9265 if (bypass_probability >= 0)
9267 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9268 GEN_INT (bypass_probability),
9271 i = emit_jump_insn (gen_rtx_SET
9273 gen_rtx_IF_THEN_ELSE (VOIDmode,
9274 condition, target1, target2)));
9275 if (probability >= 0)
9277 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9278 GEN_INT (probability),
9280 if (second != NULL_RTX)
9282 i = emit_jump_insn (gen_rtx_SET
9284 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9286 if (second_probability >= 0)
9288 = gen_rtx_EXPR_LIST (REG_BR_PROB,
9289 GEN_INT (second_probability),
9292 if (label != NULL_RTX)
9297 ix86_expand_setcc (code, dest)
9301 rtx ret, tmp, tmpreg;
9302 rtx second_test, bypass_test;
9304 if (GET_MODE (ix86_compare_op0) == DImode
9306 return 0; /* FAIL */
9308 if (GET_MODE (dest) != QImode)
9311 ret = ix86_expand_compare (code, &second_test, &bypass_test);
9312 PUT_MODE (ret, QImode);
9317 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9318 if (bypass_test || second_test)
9320 rtx test = second_test;
9322 rtx tmp2 = gen_reg_rtx (QImode);
9329 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9331 PUT_MODE (test, QImode);
9332 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9335 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9337 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9340 return 1; /* DONE */
9343 /* Expand comparison setting or clearing carry flag. Return true when successful
9344 and set pop for the operation. */
9346 ix86_expand_carry_flag_compare (code, op0, op1, pop)
9350 enum machine_mode mode =
9351 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9353 /* Do not handle DImode compares that go trought special path. Also we can't
9354 deal with FP compares yet. This is possible to add. */
9355 if ((mode == DImode && !TARGET_64BIT))
9357 if (FLOAT_MODE_P (mode))
9359 rtx second_test = NULL, bypass_test = NULL;
9360 rtx compare_op, compare_seq;
9362 /* Shortcut: following common codes never translate into carry flag compares. */
9363 if (code == EQ || code == NE || code == UNEQ || code == LTGT
9364 || code == ORDERED || code == UNORDERED)
9367 /* These comparisons require zero flag; swap operands so they won't. */
9368 if ((code == GT || code == UNLE || code == LE || code == UNGT)
9374 code = swap_condition (code);
9377 /* Try to expand the comparsion and verify that we end up with carry flag
9378 based comparsion. This is fails to be true only when we decide to expand
9379 comparsion using arithmetic that is not too common scenario. */
9381 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9382 &second_test, &bypass_test);
9383 compare_seq = get_insns ();
9386 if (second_test || bypass_test)
9388 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9389 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9390 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9392 code = GET_CODE (compare_op);
9393 if (code != LTU && code != GEU)
9395 emit_insn (compare_seq);
9399 if (!INTEGRAL_MODE_P (mode))
9407 /* Convert a==0 into (unsigned)a<1. */
9410 if (op1 != const0_rtx)
9413 code = (code == EQ ? LTU : GEU);
9416 /* Convert a>b into b<a or a>=b-1. */
9419 if (GET_CODE (op1) == CONST_INT)
9421 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9422 /* Bail out on overflow. We still can swap operands but that
9423 would force loading of the constant into register. */
9424 if (op1 == const0_rtx
9425 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9427 code = (code == GTU ? GEU : LTU);
9434 code = (code == GTU ? LTU : GEU);
9438 /* Convert a>0 into (unsigned)a<0x7fffffff. */
9441 if (mode == DImode || op1 != const0_rtx)
9443 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9444 code = (code == LT ? GEU : LTU);
9448 if (mode == DImode || op1 != constm1_rtx)
9450 op1 = gen_int_mode (~(1 << (GET_MODE_BITSIZE (mode) - 1)), mode);
9451 code = (code == LE ? GEU : LTU);
9457 ix86_compare_op0 = op0;
9458 ix86_compare_op1 = op1;
9459 *pop = ix86_expand_compare (code, NULL, NULL);
9460 if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9466 ix86_expand_int_movcc (operands)
9469 enum rtx_code code = GET_CODE (operands[1]), compare_code;
9470 rtx compare_seq, compare_op;
9471 rtx second_test, bypass_test;
9472 enum machine_mode mode = GET_MODE (operands[0]);
9473 bool sign_bit_compare_p = false;;
9476 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9477 compare_seq = get_insns ();
9480 compare_code = GET_CODE (compare_op);
9482 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9483 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9484 sign_bit_compare_p = true;
9486 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9487 HImode insns, we'd be swallowed in word prefix ops. */
9489 if ((mode != HImode || TARGET_FAST_PREFIX)
9490 && (mode != DImode || TARGET_64BIT)
9491 && GET_CODE (operands[2]) == CONST_INT
9492 && GET_CODE (operands[3]) == CONST_INT)
9494 rtx out = operands[0];
9495 HOST_WIDE_INT ct = INTVAL (operands[2]);
9496 HOST_WIDE_INT cf = INTVAL (operands[3]);
9500 /* Sign bit compares are better done using shifts than we do by using
9502 if (sign_bit_compare_p
9503 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9504 ix86_compare_op1, &compare_op))
9506 /* Detect overlap between destination and compare sources. */
9509 if (!sign_bit_compare_p)
9513 compare_code = GET_CODE (compare_op);
9515 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9516 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9519 compare_code = ix86_fp_compare_code_to_integer (compare_code);
9522 /* To simplify rest of code, restrict to the GEU case. */
9523 if (compare_code == LTU)
9525 HOST_WIDE_INT tmp = ct;
9528 compare_code = reverse_condition (compare_code);
9529 code = reverse_condition (code);
9534 PUT_CODE (compare_op,
9535 reverse_condition_maybe_unordered
9536 (GET_CODE (compare_op)));
9538 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9542 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9543 || reg_overlap_mentioned_p (out, ix86_compare_op1))
9544 tmp = gen_reg_rtx (mode);
9547 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9549 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9553 if (code == GT || code == GE)
9554 code = reverse_condition (code);
9557 HOST_WIDE_INT tmp = ct;
9561 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9562 ix86_compare_op1, VOIDmode, 0, -1);
9575 tmp = expand_simple_binop (mode, PLUS,
9577 copy_rtx (tmp), 1, OPTAB_DIRECT);
9588 tmp = expand_simple_binop (mode, IOR,
9590 copy_rtx (tmp), 1, OPTAB_DIRECT);
9592 else if (diff == -1 && ct)
9602 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9604 tmp = expand_simple_binop (mode, PLUS,
9605 copy_rtx (tmp), GEN_INT (cf),
9606 copy_rtx (tmp), 1, OPTAB_DIRECT);
9614 * andl cf - ct, dest
9624 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9627 tmp = expand_simple_binop (mode, AND,
9629 gen_int_mode (cf - ct, mode),
9630 copy_rtx (tmp), 1, OPTAB_DIRECT);
9632 tmp = expand_simple_binop (mode, PLUS,
9633 copy_rtx (tmp), GEN_INT (ct),
9634 copy_rtx (tmp), 1, OPTAB_DIRECT);
9637 if (!rtx_equal_p (tmp, out))
9638 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9640 return 1; /* DONE */
9646 tmp = ct, ct = cf, cf = tmp;
9648 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9650 /* We may be reversing unordered compare to normal compare, that
9651 is not valid in general (we may convert non-trapping condition
9652 to trapping one), however on i386 we currently emit all
9653 comparisons unordered. */
9654 compare_code = reverse_condition_maybe_unordered (compare_code);
9655 code = reverse_condition_maybe_unordered (code);
9659 compare_code = reverse_condition (compare_code);
9660 code = reverse_condition (code);
9665 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9666 && GET_CODE (ix86_compare_op1) == CONST_INT)
9668 if (ix86_compare_op1 == const0_rtx
9669 && (code == LT || code == GE))
9670 compare_code = code;
9671 else if (ix86_compare_op1 == constm1_rtx)
9675 else if (code == GT)
9680 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9681 if (compare_code != NIL
9682 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9683 && (cf == -1 || ct == -1))
9685 /* If lea code below could be used, only optimize
9686 if it results in a 2 insn sequence. */
9688 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9689 || diff == 3 || diff == 5 || diff == 9)
9690 || (compare_code == LT && ct == -1)
9691 || (compare_code == GE && cf == -1))
9694 * notl op1 (if necessary)
9702 code = reverse_condition (code);
9705 out = emit_store_flag (out, code, ix86_compare_op0,
9706 ix86_compare_op1, VOIDmode, 0, -1);
9708 out = expand_simple_binop (mode, IOR,
9710 out, 1, OPTAB_DIRECT);
9711 if (out != operands[0])
9712 emit_move_insn (operands[0], out);
9714 return 1; /* DONE */
9719 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9720 || diff == 3 || diff == 5 || diff == 9)
9721 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
9722 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
9728 * lea cf(dest*(ct-cf)),dest
9732 * This also catches the degenerate setcc-only case.
9738 out = emit_store_flag (out, code, ix86_compare_op0,
9739 ix86_compare_op1, VOIDmode, 0, 1);
9742 /* On x86_64 the lea instruction operates on Pmode, so we need
9743 to get arithmetics done in proper mode to match. */
9745 tmp = copy_rtx (out);
9749 out1 = copy_rtx (out);
9750 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
9754 tmp = gen_rtx_PLUS (mode, tmp, out1);
9760 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
9763 if (!rtx_equal_p (tmp, out))
9766 out = force_operand (tmp, copy_rtx (out));
9768 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
9770 if (!rtx_equal_p (out, operands[0]))
9771 emit_move_insn (operands[0], copy_rtx (out));
9773 return 1; /* DONE */
9777 * General case: Jumpful:
9778 * xorl dest,dest cmpl op1, op2
9779 * cmpl op1, op2 movl ct, dest
9781 * decl dest movl cf, dest
9782 * andl (cf-ct),dest 1:
9787 * This is reasonably steep, but branch mispredict costs are
9788 * high on modern cpus, so consider failing only if optimizing
9792 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9793 && BRANCH_COST >= 2)
9799 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9800 /* We may be reversing unordered compare to normal compare,
9801 that is not valid in general (we may convert non-trapping
9802 condition to trapping one), however on i386 we currently
9803 emit all comparisons unordered. */
9804 code = reverse_condition_maybe_unordered (code);
9807 code = reverse_condition (code);
9808 if (compare_code != NIL)
9809 compare_code = reverse_condition (compare_code);
9813 if (compare_code != NIL)
9815 /* notl op1 (if needed)
9820 For x < 0 (resp. x <= -1) there will be no notl,
9821 so if possible swap the constants to get rid of the
9823 True/false will be -1/0 while code below (store flag
9824 followed by decrement) is 0/-1, so the constants need
9825 to be exchanged once more. */
9827 if (compare_code == GE || !cf)
9829 code = reverse_condition (code);
9834 HOST_WIDE_INT tmp = cf;
9839 out = emit_store_flag (out, code, ix86_compare_op0,
9840 ix86_compare_op1, VOIDmode, 0, -1);
9844 out = emit_store_flag (out, code, ix86_compare_op0,
9845 ix86_compare_op1, VOIDmode, 0, 1);
9847 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
9848 copy_rtx (out), 1, OPTAB_DIRECT);
9851 out = expand_simple_binop (mode, AND, copy_rtx (out),
9852 gen_int_mode (cf - ct, mode),
9853 copy_rtx (out), 1, OPTAB_DIRECT);
9855 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
9856 copy_rtx (out), 1, OPTAB_DIRECT);
9857 if (!rtx_equal_p (out, operands[0]))
9858 emit_move_insn (operands[0], copy_rtx (out));
9860 return 1; /* DONE */
9864 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
9866 /* Try a few things more with specific constants and a variable. */
9869 rtx var, orig_out, out, tmp;
9871 if (BRANCH_COST <= 2)
9872 return 0; /* FAIL */
9874 /* If one of the two operands is an interesting constant, load a
9875 constant with the above and mask it in with a logical operation. */
9877 if (GET_CODE (operands[2]) == CONST_INT)
9880 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
9881 operands[3] = constm1_rtx, op = and_optab;
9882 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
9883 operands[3] = const0_rtx, op = ior_optab;
9885 return 0; /* FAIL */
9887 else if (GET_CODE (operands[3]) == CONST_INT)
9890 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
9891 operands[2] = constm1_rtx, op = and_optab;
9892 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
9893 operands[2] = const0_rtx, op = ior_optab;
9895 return 0; /* FAIL */
9898 return 0; /* FAIL */
9900 orig_out = operands[0];
9901 tmp = gen_reg_rtx (mode);
9904 /* Recurse to get the constant loaded. */
9905 if (ix86_expand_int_movcc (operands) == 0)
9906 return 0; /* FAIL */
9908 /* Mask in the interesting variable. */
9909 out = expand_binop (mode, op, var, tmp, orig_out, 0,
9911 if (!rtx_equal_p (out, orig_out))
9912 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
9914 return 1; /* DONE */
9918 * For comparison with above,
9928 if (! nonimmediate_operand (operands[2], mode))
9929 operands[2] = force_reg (mode, operands[2]);
9930 if (! nonimmediate_operand (operands[3], mode))
9931 operands[3] = force_reg (mode, operands[3]);
9933 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9935 rtx tmp = gen_reg_rtx (mode);
9936 emit_move_insn (tmp, operands[3]);
9939 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9941 rtx tmp = gen_reg_rtx (mode);
9942 emit_move_insn (tmp, operands[2]);
9946 if (! register_operand (operands[2], VOIDmode)
9948 || ! register_operand (operands[3], VOIDmode)))
9949 operands[2] = force_reg (mode, operands[2]);
9952 && ! register_operand (operands[3], VOIDmode))
9953 operands[3] = force_reg (mode, operands[3]);
9955 emit_insn (compare_seq);
9956 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9957 gen_rtx_IF_THEN_ELSE (mode,
9958 compare_op, operands[2],
9961 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9962 gen_rtx_IF_THEN_ELSE (mode,
9964 copy_rtx (operands[3]),
9965 copy_rtx (operands[0]))));
9967 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
9968 gen_rtx_IF_THEN_ELSE (mode,
9970 copy_rtx (operands[2]),
9971 copy_rtx (operands[0]))));
9973 return 1; /* DONE */
9977 ix86_expand_fp_movcc (operands)
9982 rtx compare_op, second_test, bypass_test;
9984 /* For SF/DFmode conditional moves based on comparisons
9985 in same mode, we may want to use SSE min/max instructions. */
9986 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9987 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
9988 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
9989 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9991 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
9992 /* We may be called from the post-reload splitter. */
9993 && (!REG_P (operands[0])
9994 || SSE_REG_P (operands[0])
9995 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
9997 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9998 code = GET_CODE (operands[1]);
10000 /* See if we have (cross) match between comparison operands and
10001 conditional move operands. */
10002 if (rtx_equal_p (operands[2], op1))
10007 code = reverse_condition_maybe_unordered (code);
10009 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10011 /* Check for min operation. */
10012 if (code == LT || code == UNLE)
10020 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10021 if (memory_operand (op0, VOIDmode))
10022 op0 = force_reg (GET_MODE (operands[0]), op0);
10023 if (GET_MODE (operands[0]) == SFmode)
10024 emit_insn (gen_minsf3 (operands[0], op0, op1));
10026 emit_insn (gen_mindf3 (operands[0], op0, op1));
10029 /* Check for max operation. */
10030 if (code == GT || code == UNGE)
10038 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10039 if (memory_operand (op0, VOIDmode))
10040 op0 = force_reg (GET_MODE (operands[0]), op0);
10041 if (GET_MODE (operands[0]) == SFmode)
10042 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10044 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10048 /* Manage condition to be sse_comparison_operator. In case we are
10049 in non-ieee mode, try to canonicalize the destination operand
10050 to be first in the comparison - this helps reload to avoid extra
10052 if (!sse_comparison_operator (operands[1], VOIDmode)
10053 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10055 rtx tmp = ix86_compare_op0;
10056 ix86_compare_op0 = ix86_compare_op1;
10057 ix86_compare_op1 = tmp;
10058 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10059 VOIDmode, ix86_compare_op0,
10062 /* Similarly try to manage result to be first operand of conditional
10063 move. We also don't support the NE comparison on SSE, so try to
10065 if ((rtx_equal_p (operands[0], operands[3])
10066 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10067 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10069 rtx tmp = operands[2];
10070 operands[2] = operands[3];
10072 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10073 (GET_CODE (operands[1])),
10074 VOIDmode, ix86_compare_op0,
10077 if (GET_MODE (operands[0]) == SFmode)
10078 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10079 operands[2], operands[3],
10080 ix86_compare_op0, ix86_compare_op1));
10082 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10083 operands[2], operands[3],
10084 ix86_compare_op0, ix86_compare_op1));
10088 /* The floating point conditional move instructions don't directly
10089 support conditions resulting from a signed integer comparison. */
10091 code = GET_CODE (operands[1]);
10092 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10094 /* The floating point conditional move instructions don't directly
10095 support signed integer comparisons. */
10097 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10099 if (second_test != NULL || bypass_test != NULL)
10101 tmp = gen_reg_rtx (QImode);
10102 ix86_expand_setcc (code, tmp);
10104 ix86_compare_op0 = tmp;
10105 ix86_compare_op1 = const0_rtx;
10106 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10108 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10110 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10111 emit_move_insn (tmp, operands[3]);
10114 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10116 tmp = gen_reg_rtx (GET_MODE (operands[0]));
10117 emit_move_insn (tmp, operands[2]);
10121 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10122 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10127 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10128 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10133 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10134 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10142 /* Expand conditional increment or decrement using adb/sbb instructions.
10143 The default case using setcc followed by the conditional move can be
10144 done by generic code. */
10146 ix86_expand_int_addcc (operands)
10149 enum rtx_code code = GET_CODE (operands[1]);
10151 rtx val = const0_rtx;
10152 bool fpcmp = false;
10154 enum machine_mode mode = GET_MODE (operands[0]);
10156 if (operands[3] != const1_rtx
10157 && operands[3] != constm1_rtx)
10159 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10160 ix86_compare_op1, &compare_op))
10162 code = GET_CODE (compare_op);
10164 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10165 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10168 code = ix86_fp_compare_code_to_integer (code);
10175 PUT_CODE (compare_op,
10176 reverse_condition_maybe_unordered
10177 (GET_CODE (compare_op)));
10179 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10181 PUT_MODE (compare_op, mode);
10183 /* Construct either adc or sbb insn. */
10184 if ((code == LTU) == (operands[3] == constm1_rtx))
10186 switch (GET_MODE (operands[0]))
10189 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10192 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10195 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10198 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10206 switch (GET_MODE (operands[0]))
10209 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10212 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10215 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10218 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10224 return 1; /* DONE */
10228 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
10229 works for floating pointer parameters and nonoffsetable memories.
10230 For pushes, it returns just stack offsets; the values will be saved
10231 in the right order. Maximally three parts are generated. */
10234 ix86_split_to_parts (operand, parts, mode)
10237 enum machine_mode mode;
10242 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
10244 size = (GET_MODE_SIZE (mode) + 4) / 8;
10246 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10248 if (size < 2 || size > 3)
10251 /* Optimize constant pool reference to immediates. This is used by fp
10252 moves, that force all constants to memory to allow combining. */
10253 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10255 rtx tmp = maybe_get_pool_constant (operand);
10260 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10262 /* The only non-offsetable memories we handle are pushes. */
10263 if (! push_operand (operand, VOIDmode))
10266 operand = copy_rtx (operand);
10267 PUT_MODE (operand, Pmode);
10268 parts[0] = parts[1] = parts[2] = operand;
10270 else if (!TARGET_64BIT)
10272 if (mode == DImode)
10273 split_di (&operand, 1, &parts[0], &parts[1]);
10276 if (REG_P (operand))
10278 if (!reload_completed)
10280 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10281 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10283 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10285 else if (offsettable_memref_p (operand))
10287 operand = adjust_address (operand, SImode, 0);
10288 parts[0] = operand;
10289 parts[1] = adjust_address (operand, SImode, 4);
10291 parts[2] = adjust_address (operand, SImode, 8);
10293 else if (GET_CODE (operand) == CONST_DOUBLE)
10298 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10303 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10304 parts[2] = gen_int_mode (l[2], SImode);
10307 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10312 parts[1] = gen_int_mode (l[1], SImode);
10313 parts[0] = gen_int_mode (l[0], SImode);
10321 if (mode == TImode)
10322 split_ti (&operand, 1, &parts[0], &parts[1]);
10323 if (mode == XFmode || mode == TFmode)
10325 if (REG_P (operand))
10327 if (!reload_completed)
10329 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10330 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10332 else if (offsettable_memref_p (operand))
10334 operand = adjust_address (operand, DImode, 0);
10335 parts[0] = operand;
10336 parts[1] = adjust_address (operand, SImode, 8);
10338 else if (GET_CODE (operand) == CONST_DOUBLE)
10343 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10344 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10345 /* Do not use shift by 32 to avoid warning on 32bit systems. */
10346 if (HOST_BITS_PER_WIDE_INT >= 64)
10349 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10350 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10353 parts[0] = immed_double_const (l[0], l[1], DImode);
10354 parts[1] = gen_int_mode (l[2], SImode);
10364 /* Emit insns to perform a move or push of DI, DF, and XF values.
10365 Return false when normal moves are needed; true when all required
10366 insns have been emitted. Operands 2-4 contain the input values
10367 int the correct order; operands 5-7 contain the output values. */
10370 ix86_split_long_move (operands)
10376 int collisions = 0;
10377 enum machine_mode mode = GET_MODE (operands[0]);
10379 /* The DFmode expanders may ask us to move double.
10380 For 64bit target this is single move. By hiding the fact
10381 here we simplify i386.md splitters. */
10382 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10384 /* Optimize constant pool reference to immediates. This is used by
10385 fp moves, that force all constants to memory to allow combining. */
10387 if (GET_CODE (operands[1]) == MEM
10388 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10389 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10390 operands[1] = get_pool_constant (XEXP (operands[1], 0));
10391 if (push_operand (operands[0], VOIDmode))
10393 operands[0] = copy_rtx (operands[0]);
10394 PUT_MODE (operands[0], Pmode);
10397 operands[0] = gen_lowpart (DImode, operands[0]);
10398 operands[1] = gen_lowpart (DImode, operands[1]);
10399 emit_move_insn (operands[0], operands[1]);
10403 /* The only non-offsettable memory we handle is push. */
10404 if (push_operand (operands[0], VOIDmode))
10406 else if (GET_CODE (operands[0]) == MEM
10407 && ! offsettable_memref_p (operands[0]))
10410 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10411 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10413 /* When emitting push, take care for source operands on the stack. */
10414 if (push && GET_CODE (operands[1]) == MEM
10415 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10418 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10419 XEXP (part[1][2], 0));
10420 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10421 XEXP (part[1][1], 0));
10424 /* We need to do copy in the right order in case an address register
10425 of the source overlaps the destination. */
10426 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10428 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10430 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10433 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10436 /* Collision in the middle part can be handled by reordering. */
10437 if (collisions == 1 && nparts == 3
10438 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10441 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10442 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10445 /* If there are more collisions, we can't handle it by reordering.
10446 Do an lea to the last part and use only one colliding move. */
10447 else if (collisions > 1)
10450 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
10451 XEXP (part[1][0], 0)));
10452 part[1][0] = change_address (part[1][0],
10453 TARGET_64BIT ? DImode : SImode,
10454 part[0][nparts - 1]);
10455 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
10457 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
10467 /* We use only first 12 bytes of TFmode value, but for pushing we
10468 are required to adjust stack as if we were pushing real 16byte
10470 if (mode == TFmode && !TARGET_64BIT)
10471 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10473 emit_move_insn (part[0][2], part[1][2]);
10478 /* In 64bit mode we don't have 32bit push available. In case this is
10479 register, it is OK - we will just use larger counterpart. We also
10480 retype memory - these comes from attempt to avoid REX prefix on
10481 moving of second half of TFmode value. */
10482 if (GET_MODE (part[1][1]) == SImode)
10484 if (GET_CODE (part[1][1]) == MEM)
10485 part[1][1] = adjust_address (part[1][1], DImode, 0);
10486 else if (REG_P (part[1][1]))
10487 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10490 if (GET_MODE (part[1][0]) == SImode)
10491 part[1][0] = part[1][1];
10494 emit_move_insn (part[0][1], part[1][1]);
10495 emit_move_insn (part[0][0], part[1][0]);
10499 /* Choose correct order to not overwrite the source before it is copied. */
10500 if ((REG_P (part[0][0])
10501 && REG_P (part[1][1])
10502 && (REGNO (part[0][0]) == REGNO (part[1][1])
10504 && REGNO (part[0][0]) == REGNO (part[1][2]))))
10506 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10510 operands[2] = part[0][2];
10511 operands[3] = part[0][1];
10512 operands[4] = part[0][0];
10513 operands[5] = part[1][2];
10514 operands[6] = part[1][1];
10515 operands[7] = part[1][0];
10519 operands[2] = part[0][1];
10520 operands[3] = part[0][0];
10521 operands[5] = part[1][1];
10522 operands[6] = part[1][0];
10529 operands[2] = part[0][0];
10530 operands[3] = part[0][1];
10531 operands[4] = part[0][2];
10532 operands[5] = part[1][0];
10533 operands[6] = part[1][1];
10534 operands[7] = part[1][2];
10538 operands[2] = part[0][0];
10539 operands[3] = part[0][1];
10540 operands[5] = part[1][0];
10541 operands[6] = part[1][1];
10544 emit_move_insn (operands[2], operands[5]);
10545 emit_move_insn (operands[3], operands[6]);
10547 emit_move_insn (operands[4], operands[7]);
10553 ix86_split_ashldi (operands, scratch)
10554 rtx *operands, scratch;
10556 rtx low[2], high[2];
10559 if (GET_CODE (operands[2]) == CONST_INT)
10561 split_di (operands, 2, low, high);
10562 count = INTVAL (operands[2]) & 63;
10566 emit_move_insn (high[0], low[1]);
10567 emit_move_insn (low[0], const0_rtx);
10570 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10574 if (!rtx_equal_p (operands[0], operands[1]))
10575 emit_move_insn (operands[0], operands[1]);
10576 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10577 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10582 if (!rtx_equal_p (operands[0], operands[1]))
10583 emit_move_insn (operands[0], operands[1]);
10585 split_di (operands, 1, low, high);
10587 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10588 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10590 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10592 if (! no_new_pseudos)
10593 scratch = force_reg (SImode, const0_rtx);
10595 emit_move_insn (scratch, const0_rtx);
10597 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10601 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10606 ix86_split_ashrdi (operands, scratch)
10607 rtx *operands, scratch;
10609 rtx low[2], high[2];
10612 if (GET_CODE (operands[2]) == CONST_INT)
10614 split_di (operands, 2, low, high);
10615 count = INTVAL (operands[2]) & 63;
10619 emit_move_insn (low[0], high[1]);
10621 if (! reload_completed)
10622 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10625 emit_move_insn (high[0], low[0]);
10626 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10630 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10634 if (!rtx_equal_p (operands[0], operands[1]))
10635 emit_move_insn (operands[0], operands[1]);
10636 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10637 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10642 if (!rtx_equal_p (operands[0], operands[1]))
10643 emit_move_insn (operands[0], operands[1]);
10645 split_di (operands, 1, low, high);
10647 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10648 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10650 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10652 if (! no_new_pseudos)
10653 scratch = gen_reg_rtx (SImode);
10654 emit_move_insn (scratch, high[0]);
10655 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10656 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10660 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10665 ix86_split_lshrdi (operands, scratch)
10666 rtx *operands, scratch;
10668 rtx low[2], high[2];
10671 if (GET_CODE (operands[2]) == CONST_INT)
10673 split_di (operands, 2, low, high);
10674 count = INTVAL (operands[2]) & 63;
10678 emit_move_insn (low[0], high[1]);
10679 emit_move_insn (high[0], const0_rtx);
10682 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
10686 if (!rtx_equal_p (operands[0], operands[1]))
10687 emit_move_insn (operands[0], operands[1]);
10688 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10689 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
10694 if (!rtx_equal_p (operands[0], operands[1]))
10695 emit_move_insn (operands[0], operands[1]);
10697 split_di (operands, 1, low, high);
10699 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10700 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
10702 /* Heh. By reversing the arguments, we can reuse this pattern. */
10703 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10705 if (! no_new_pseudos)
10706 scratch = force_reg (SImode, const0_rtx);
10708 emit_move_insn (scratch, const0_rtx);
10710 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10714 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
10718 /* Helper function for the string operations below. Dest VARIABLE whether
10719 it is aligned to VALUE bytes. If true, jump to the label. */
10721 ix86_expand_aligntest (variable, value)
10725 rtx label = gen_label_rtx ();
10726 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
10727 if (GET_MODE (variable) == DImode)
10728 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10730 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10731 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
10736 /* Adjust COUNTER by the VALUE. */
10738 ix86_adjust_counter (countreg, value)
10740 HOST_WIDE_INT value;
10742 if (GET_MODE (countreg) == DImode)
10743 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10745 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10748 /* Zero extend possibly SImode EXP to Pmode register. */
10750 ix86_zero_extend_to_Pmode (exp)
10754 if (GET_MODE (exp) == VOIDmode)
10755 return force_reg (Pmode, exp);
10756 if (GET_MODE (exp) == Pmode)
10757 return copy_to_mode_reg (Pmode, exp);
10758 r = gen_reg_rtx (Pmode);
10759 emit_insn (gen_zero_extendsidi2 (r, exp));
10763 /* Expand string move (memcpy) operation. Use i386 string operations when
10764 profitable. expand_clrstr contains similar code. */
10766 ix86_expand_movstr (dst, src, count_exp, align_exp)
10767 rtx dst, src, count_exp, align_exp;
10769 rtx srcreg, destreg, countreg;
10770 enum machine_mode counter_mode;
10771 HOST_WIDE_INT align = 0;
10772 unsigned HOST_WIDE_INT count = 0;
10776 if (GET_CODE (align_exp) == CONST_INT)
10777 align = INTVAL (align_exp);
10779 /* This simple hack avoids all inlining code and simplifies code below. */
10780 if (!TARGET_ALIGN_STRINGOPS)
10783 if (GET_CODE (count_exp) == CONST_INT)
10785 count = INTVAL (count_exp);
10786 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
10790 /* Figure out proper mode for counter. For 32bits it is always SImode,
10791 for 64bits use SImode when possible, otherwise DImode.
10792 Set count to number of bytes copied when known at compile time. */
10793 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10794 || x86_64_zero_extended_value (count_exp))
10795 counter_mode = SImode;
10797 counter_mode = DImode;
10801 if (counter_mode != SImode && counter_mode != DImode)
10804 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10805 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10807 emit_insn (gen_cld ());
10809 /* When optimizing for size emit simple rep ; movsb instruction for
10810 counts not divisible by 4. */
10812 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10814 countreg = ix86_zero_extend_to_Pmode (count_exp);
10816 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10817 destreg, srcreg, countreg));
10819 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10820 destreg, srcreg, countreg));
10823 /* For constant aligned (or small unaligned) copies use rep movsl
10824 followed by code copying the rest. For PentiumPro ensure 8 byte
10825 alignment to allow rep movsl acceleration. */
10827 else if (count != 0
10829 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
10830 || optimize_size || count < (unsigned int) 64))
10832 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10833 if (count & ~(size - 1))
10835 countreg = copy_to_mode_reg (counter_mode,
10836 GEN_INT ((count >> (size == 4 ? 2 : 3))
10837 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10838 countreg = ix86_zero_extend_to_Pmode (countreg);
10842 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10843 destreg, srcreg, countreg));
10845 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10846 destreg, srcreg, countreg));
10849 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10850 destreg, srcreg, countreg));
10852 if (size == 8 && (count & 0x04))
10853 emit_insn (gen_strmovsi (destreg, srcreg));
10855 emit_insn (gen_strmovhi (destreg, srcreg));
10857 emit_insn (gen_strmovqi (destreg, srcreg));
10859 /* The generic code based on the glibc implementation:
10860 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10861 allowing accelerated copying there)
10862 - copy the data using rep movsl
10863 - copy the rest. */
10868 int desired_alignment = (TARGET_PENTIUMPRO
10869 && (count == 0 || count >= (unsigned int) 260)
10870 ? 8 : UNITS_PER_WORD);
10872 /* In case we don't know anything about the alignment, default to
10873 library version, since it is usually equally fast and result in
10876 Also emit call when we know that the count is large and call overhead
10877 will not be important. */
10878 if (!TARGET_INLINE_ALL_STRINGOPS
10879 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
10885 if (TARGET_SINGLE_STRINGOP)
10886 emit_insn (gen_cld ());
10888 countreg2 = gen_reg_rtx (Pmode);
10889 countreg = copy_to_mode_reg (counter_mode, count_exp);
10891 /* We don't use loops to align destination and to copy parts smaller
10892 than 4 bytes, because gcc is able to optimize such code better (in
10893 the case the destination or the count really is aligned, gcc is often
10894 able to predict the branches) and also it is friendlier to the
10895 hardware branch prediction.
10897 Using loops is beneficial for generic case, because we can
10898 handle small counts using the loops. Many CPUs (such as Athlon)
10899 have large REP prefix setup costs.
10901 This is quite costly. Maybe we can revisit this decision later or
10902 add some customizability to this code. */
10904 if (count == 0 && align < desired_alignment)
10906 label = gen_label_rtx ();
10907 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
10908 LEU, 0, counter_mode, 1, label);
10912 rtx label = ix86_expand_aligntest (destreg, 1);
10913 emit_insn (gen_strmovqi (destreg, srcreg));
10914 ix86_adjust_counter (countreg, 1);
10915 emit_label (label);
10916 LABEL_NUSES (label) = 1;
10920 rtx label = ix86_expand_aligntest (destreg, 2);
10921 emit_insn (gen_strmovhi (destreg, srcreg));
10922 ix86_adjust_counter (countreg, 2);
10923 emit_label (label);
10924 LABEL_NUSES (label) = 1;
10926 if (align <= 4 && desired_alignment > 4)
10928 rtx label = ix86_expand_aligntest (destreg, 4);
10929 emit_insn (gen_strmovsi (destreg, srcreg));
10930 ix86_adjust_counter (countreg, 4);
10931 emit_label (label);
10932 LABEL_NUSES (label) = 1;
10935 if (label && desired_alignment > 4 && !TARGET_64BIT)
10937 emit_label (label);
10938 LABEL_NUSES (label) = 1;
10941 if (!TARGET_SINGLE_STRINGOP)
10942 emit_insn (gen_cld ());
10945 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10947 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10948 destreg, srcreg, countreg2));
10952 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10953 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10954 destreg, srcreg, countreg2));
10959 emit_label (label);
10960 LABEL_NUSES (label) = 1;
10962 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10963 emit_insn (gen_strmovsi (destreg, srcreg));
10964 if ((align <= 4 || count == 0) && TARGET_64BIT)
10966 rtx label = ix86_expand_aligntest (countreg, 4);
10967 emit_insn (gen_strmovsi (destreg, srcreg));
10968 emit_label (label);
10969 LABEL_NUSES (label) = 1;
10971 if (align > 2 && count != 0 && (count & 2))
10972 emit_insn (gen_strmovhi (destreg, srcreg));
10973 if (align <= 2 || count == 0)
10975 rtx label = ix86_expand_aligntest (countreg, 2);
10976 emit_insn (gen_strmovhi (destreg, srcreg));
10977 emit_label (label);
10978 LABEL_NUSES (label) = 1;
10980 if (align > 1 && count != 0 && (count & 1))
10981 emit_insn (gen_strmovqi (destreg, srcreg));
10982 if (align <= 1 || count == 0)
10984 rtx label = ix86_expand_aligntest (countreg, 1);
10985 emit_insn (gen_strmovqi (destreg, srcreg));
10986 emit_label (label);
10987 LABEL_NUSES (label) = 1;
10991 insns = get_insns ();
10994 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
10999 /* Expand string clear operation (bzero). Use i386 string operations when
11000 profitable. expand_movstr contains similar code. */
11002 ix86_expand_clrstr (src, count_exp, align_exp)
11003 rtx src, count_exp, align_exp;
11005 rtx destreg, zeroreg, countreg;
11006 enum machine_mode counter_mode;
11007 HOST_WIDE_INT align = 0;
11008 unsigned HOST_WIDE_INT count = 0;
11010 if (GET_CODE (align_exp) == CONST_INT)
11011 align = INTVAL (align_exp);
11013 /* This simple hack avoids all inlining code and simplifies code below. */
11014 if (!TARGET_ALIGN_STRINGOPS)
11017 if (GET_CODE (count_exp) == CONST_INT)
11019 count = INTVAL (count_exp);
11020 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11023 /* Figure out proper mode for counter. For 32bits it is always SImode,
11024 for 64bits use SImode when possible, otherwise DImode.
11025 Set count to number of bytes copied when known at compile time. */
11026 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11027 || x86_64_zero_extended_value (count_exp))
11028 counter_mode = SImode;
11030 counter_mode = DImode;
11032 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11034 emit_insn (gen_cld ());
11036 /* When optimizing for size emit simple rep ; movsb instruction for
11037 counts not divisible by 4. */
11039 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11041 countreg = ix86_zero_extend_to_Pmode (count_exp);
11042 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11044 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
11045 destreg, countreg));
11047 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
11048 destreg, countreg));
11050 else if (count != 0
11052 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11053 || optimize_size || count < (unsigned int) 64))
11055 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11056 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11057 if (count & ~(size - 1))
11059 countreg = copy_to_mode_reg (counter_mode,
11060 GEN_INT ((count >> (size == 4 ? 2 : 3))
11061 & (TARGET_64BIT ? -1 : 0x3fffffff)));
11062 countreg = ix86_zero_extend_to_Pmode (countreg);
11066 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
11067 destreg, countreg));
11069 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
11070 destreg, countreg));
11073 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
11074 destreg, countreg));
11076 if (size == 8 && (count & 0x04))
11077 emit_insn (gen_strsetsi (destreg,
11078 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11080 emit_insn (gen_strsethi (destreg,
11081 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11083 emit_insn (gen_strsetqi (destreg,
11084 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11090 /* Compute desired alignment of the string operation. */
11091 int desired_alignment = (TARGET_PENTIUMPRO
11092 && (count == 0 || count >= (unsigned int) 260)
11093 ? 8 : UNITS_PER_WORD);
11095 /* In case we don't know anything about the alignment, default to
11096 library version, since it is usually equally fast and result in
11099 Also emit call when we know that the count is large and call overhead
11100 will not be important. */
11101 if (!TARGET_INLINE_ALL_STRINGOPS
11102 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11105 if (TARGET_SINGLE_STRINGOP)
11106 emit_insn (gen_cld ());
11108 countreg2 = gen_reg_rtx (Pmode);
11109 countreg = copy_to_mode_reg (counter_mode, count_exp);
11110 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11112 if (count == 0 && align < desired_alignment)
11114 label = gen_label_rtx ();
11115 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11116 LEU, 0, counter_mode, 1, label);
11120 rtx label = ix86_expand_aligntest (destreg, 1);
11121 emit_insn (gen_strsetqi (destreg,
11122 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11123 ix86_adjust_counter (countreg, 1);
11124 emit_label (label);
11125 LABEL_NUSES (label) = 1;
11129 rtx label = ix86_expand_aligntest (destreg, 2);
11130 emit_insn (gen_strsethi (destreg,
11131 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11132 ix86_adjust_counter (countreg, 2);
11133 emit_label (label);
11134 LABEL_NUSES (label) = 1;
11136 if (align <= 4 && desired_alignment > 4)
11138 rtx label = ix86_expand_aligntest (destreg, 4);
11139 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
11140 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11142 ix86_adjust_counter (countreg, 4);
11143 emit_label (label);
11144 LABEL_NUSES (label) = 1;
11147 if (label && desired_alignment > 4 && !TARGET_64BIT)
11149 emit_label (label);
11150 LABEL_NUSES (label) = 1;
11154 if (!TARGET_SINGLE_STRINGOP)
11155 emit_insn (gen_cld ());
11158 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11160 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
11161 destreg, countreg2));
11165 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
11166 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
11167 destreg, countreg2));
11171 emit_label (label);
11172 LABEL_NUSES (label) = 1;
11175 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11176 emit_insn (gen_strsetsi (destreg,
11177 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11178 if (TARGET_64BIT && (align <= 4 || count == 0))
11180 rtx label = ix86_expand_aligntest (countreg, 4);
11181 emit_insn (gen_strsetsi (destreg,
11182 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11183 emit_label (label);
11184 LABEL_NUSES (label) = 1;
11186 if (align > 2 && count != 0 && (count & 2))
11187 emit_insn (gen_strsethi (destreg,
11188 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11189 if (align <= 2 || count == 0)
11191 rtx label = ix86_expand_aligntest (countreg, 2);
11192 emit_insn (gen_strsethi (destreg,
11193 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11194 emit_label (label);
11195 LABEL_NUSES (label) = 1;
11197 if (align > 1 && count != 0 && (count & 1))
11198 emit_insn (gen_strsetqi (destreg,
11199 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11200 if (align <= 1 || count == 0)
11202 rtx label = ix86_expand_aligntest (countreg, 1);
11203 emit_insn (gen_strsetqi (destreg,
11204 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11205 emit_label (label);
11206 LABEL_NUSES (label) = 1;
11211 /* Expand strlen. */
11213 ix86_expand_strlen (out, src, eoschar, align)
11214 rtx out, src, eoschar, align;
11216 rtx addr, scratch1, scratch2, scratch3, scratch4;
11218 /* The generic case of strlen expander is long. Avoid it's
11219 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
11221 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11222 && !TARGET_INLINE_ALL_STRINGOPS
11224 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11227 addr = force_reg (Pmode, XEXP (src, 0));
11228 scratch1 = gen_reg_rtx (Pmode);
11230 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11233 /* Well it seems that some optimizer does not combine a call like
11234 foo(strlen(bar), strlen(bar));
11235 when the move and the subtraction is done here. It does calculate
11236 the length just once when these instructions are done inside of
11237 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
11238 often used and I use one fewer register for the lifetime of
11239 output_strlen_unroll() this is better. */
11241 emit_move_insn (out, addr);
11243 ix86_expand_strlensi_unroll_1 (out, align);
11245 /* strlensi_unroll_1 returns the address of the zero at the end of
11246 the string, like memchr(), so compute the length by subtracting
11247 the start address. */
11249 emit_insn (gen_subdi3 (out, out, addr));
11251 emit_insn (gen_subsi3 (out, out, addr));
11255 scratch2 = gen_reg_rtx (Pmode);
11256 scratch3 = gen_reg_rtx (Pmode);
11257 scratch4 = force_reg (Pmode, constm1_rtx);
11259 emit_move_insn (scratch3, addr);
11260 eoschar = force_reg (QImode, eoschar);
11262 emit_insn (gen_cld ());
11265 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
11266 align, scratch4, scratch3));
11267 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11268 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11272 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
11273 align, scratch4, scratch3));
11274 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11275 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11281 /* Expand the appropriate insns for doing strlen if not just doing
11284 out = result, initialized with the start address
11285 align_rtx = alignment of the address.
11286 scratch = scratch register, initialized with the startaddress when
11287 not aligned, otherwise undefined
11289 This is just the body. It needs the initialisations mentioned above and
11290 some address computing at the end. These things are done in i386.md. */
11293 ix86_expand_strlensi_unroll_1 (out, align_rtx)
11294 rtx out, align_rtx;
11298 rtx align_2_label = NULL_RTX;
11299 rtx align_3_label = NULL_RTX;
11300 rtx align_4_label = gen_label_rtx ();
11301 rtx end_0_label = gen_label_rtx ();
11303 rtx tmpreg = gen_reg_rtx (SImode);
11304 rtx scratch = gen_reg_rtx (SImode);
11308 if (GET_CODE (align_rtx) == CONST_INT)
11309 align = INTVAL (align_rtx);
11311 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
11313 /* Is there a known alignment and is it less than 4? */
11316 rtx scratch1 = gen_reg_rtx (Pmode);
11317 emit_move_insn (scratch1, out);
11318 /* Is there a known alignment and is it not 2? */
11321 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11322 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11324 /* Leave just the 3 lower bits. */
11325 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11326 NULL_RTX, 0, OPTAB_WIDEN);
11328 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11329 Pmode, 1, align_4_label);
11330 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11331 Pmode, 1, align_2_label);
11332 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11333 Pmode, 1, align_3_label);
11337 /* Since the alignment is 2, we have to check 2 or 0 bytes;
11338 check if is aligned to 4 - byte. */
11340 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11341 NULL_RTX, 0, OPTAB_WIDEN);
11343 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11344 Pmode, 1, align_4_label);
11347 mem = gen_rtx_MEM (QImode, out);
11349 /* Now compare the bytes. */
11351 /* Compare the first n unaligned byte on a byte per byte basis. */
11352 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11353 QImode, 1, end_0_label);
11355 /* Increment the address. */
11357 emit_insn (gen_adddi3 (out, out, const1_rtx));
11359 emit_insn (gen_addsi3 (out, out, const1_rtx));
11361 /* Not needed with an alignment of 2 */
11364 emit_label (align_2_label);
11366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11370 emit_insn (gen_adddi3 (out, out, const1_rtx));
11372 emit_insn (gen_addsi3 (out, out, const1_rtx));
11374 emit_label (align_3_label);
11377 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11381 emit_insn (gen_adddi3 (out, out, const1_rtx));
11383 emit_insn (gen_addsi3 (out, out, const1_rtx));
11386 /* Generate loop to check 4 bytes at a time. It is not a good idea to
11387 align this loop. It gives only huge programs, but does not help to
11389 emit_label (align_4_label);
11391 mem = gen_rtx_MEM (SImode, out);
11392 emit_move_insn (scratch, mem);
11394 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11396 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11398 /* This formula yields a nonzero result iff one of the bytes is zero.
11399 This saves three branches inside loop and many cycles. */
11401 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11402 emit_insn (gen_one_cmplsi2 (scratch, scratch));
11403 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11404 emit_insn (gen_andsi3 (tmpreg, tmpreg,
11405 gen_int_mode (0x80808080, SImode)));
11406 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11411 rtx reg = gen_reg_rtx (SImode);
11412 rtx reg2 = gen_reg_rtx (Pmode);
11413 emit_move_insn (reg, tmpreg);
11414 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11416 /* If zero is not in the first two bytes, move two bytes forward. */
11417 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11418 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11419 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11420 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11421 gen_rtx_IF_THEN_ELSE (SImode, tmp,
11424 /* Emit lea manually to avoid clobbering of flags. */
11425 emit_insn (gen_rtx_SET (SImode, reg2,
11426 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11428 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11429 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11430 emit_insn (gen_rtx_SET (VOIDmode, out,
11431 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11438 rtx end_2_label = gen_label_rtx ();
11439 /* Is zero in the first two bytes? */
11441 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11442 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11443 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11444 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11445 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11447 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11448 JUMP_LABEL (tmp) = end_2_label;
11450 /* Not in the first two. Move two bytes forward. */
11451 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11453 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11455 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11457 emit_label (end_2_label);
11461 /* Avoid branch in fixing the byte. */
11462 tmpreg = gen_lowpart (QImode, tmpreg);
11463 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11464 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11466 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11468 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11470 emit_label (end_0_label);
11474 ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop, sibcall)
11475 rtx retval, fnaddr, callarg1, callarg2, pop;
11478 rtx use = NULL, call;
11480 if (pop == const0_rtx)
11482 if (TARGET_64BIT && pop)
11486 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11487 fnaddr = machopic_indirect_call_target (fnaddr);
11489 /* Static functions and indirect calls don't need the pic register. */
11490 if (! TARGET_64BIT && flag_pic
11491 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11492 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
11493 use_reg (&use, pic_offset_table_rtx);
11495 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11497 rtx al = gen_rtx_REG (QImode, 0);
11498 emit_move_insn (al, callarg2);
11499 use_reg (&use, al);
11501 #endif /* TARGET_MACHO */
11503 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11505 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11506 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11508 if (sibcall && TARGET_64BIT
11509 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11512 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11513 fnaddr = gen_rtx_REG (Pmode, 40);
11514 emit_move_insn (fnaddr, addr);
11515 fnaddr = gen_rtx_MEM (QImode, fnaddr);
11518 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11520 call = gen_rtx_SET (VOIDmode, retval, call);
11523 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11524 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11525 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11528 call = emit_call_insn (call);
11530 CALL_INSN_FUNCTION_USAGE (call) = use;
11534 /* Clear stack slot assignments remembered from previous functions.
11535 This is called from INIT_EXPANDERS once before RTL is emitted for each
11538 static struct machine_function *
11539 ix86_init_machine_status ()
11541 return ggc_alloc_cleared (sizeof (struct machine_function));
11544 /* Return a MEM corresponding to a stack slot with mode MODE.
11545 Allocate a new slot if necessary.
11547 The RTL for a function can have several slots available: N is
11548 which slot to use. */
11551 assign_386_stack_local (mode, n)
11552 enum machine_mode mode;
11555 if (n < 0 || n >= MAX_386_STACK_LOCALS)
11558 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
11559 ix86_stack_locals[(int) mode][n]
11560 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11562 return ix86_stack_locals[(int) mode][n];
11565 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11567 static GTY(()) rtx ix86_tls_symbol;
11569 ix86_tls_get_addr ()
11572 if (!ix86_tls_symbol)
11574 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11575 (TARGET_GNU_TLS && !TARGET_64BIT)
11576 ? "___tls_get_addr"
11577 : "__tls_get_addr");
11580 return ix86_tls_symbol;
11583 /* Calculate the length of the memory address in the instruction
11584 encoding. Does not include the one-byte modrm, opcode, or prefix. */
11587 memory_address_length (addr)
11590 struct ix86_address parts;
11591 rtx base, index, disp;
11594 if (GET_CODE (addr) == PRE_DEC
11595 || GET_CODE (addr) == POST_INC
11596 || GET_CODE (addr) == PRE_MODIFY
11597 || GET_CODE (addr) == POST_MODIFY)
11600 if (! ix86_decompose_address (addr, &parts))
11604 index = parts.index;
11608 /* Register Indirect. */
11609 if (base && !index && !disp)
11611 /* Special cases: ebp and esp need the two-byte modrm form. */
11612 if (addr == stack_pointer_rtx
11613 || addr == arg_pointer_rtx
11614 || addr == frame_pointer_rtx
11615 || addr == hard_frame_pointer_rtx)
11619 /* Direct Addressing. */
11620 else if (disp && !base && !index)
11625 /* Find the length of the displacement constant. */
11628 if (GET_CODE (disp) == CONST_INT
11629 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
11635 /* An index requires the two-byte modrm form. */
11643 /* Compute default value for "length_immediate" attribute. When SHORTFORM
11644 is set, expect that insn have 8bit immediate alternative. */
11646 ix86_attr_length_immediate_default (insn, shortform)
11652 extract_insn_cached (insn);
11653 for (i = recog_data.n_operands - 1; i >= 0; --i)
11654 if (CONSTANT_P (recog_data.operand[i]))
11659 && GET_CODE (recog_data.operand[i]) == CONST_INT
11660 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
11664 switch (get_attr_mode (insn))
11675 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
11680 fatal_insn ("unknown insn mode", insn);
11686 /* Compute default value for "length_address" attribute. */
11688 ix86_attr_length_address_default (insn)
11692 extract_insn_cached (insn);
11693 for (i = recog_data.n_operands - 1; i >= 0; --i)
11694 if (GET_CODE (recog_data.operand[i]) == MEM)
11696 return memory_address_length (XEXP (recog_data.operand[i], 0));
11702 /* Return the maximum number of instructions a cpu can issue. */
11709 case PROCESSOR_PENTIUM:
11713 case PROCESSOR_PENTIUMPRO:
11714 case PROCESSOR_PENTIUM4:
11715 case PROCESSOR_ATHLON:
11724 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
11725 by DEP_INSN and nothing set by DEP_INSN. */
11728 ix86_flags_dependant (insn, dep_insn, insn_type)
11729 rtx insn, dep_insn;
11730 enum attr_type insn_type;
11734 /* Simplify the test for uninteresting insns. */
11735 if (insn_type != TYPE_SETCC
11736 && insn_type != TYPE_ICMOV
11737 && insn_type != TYPE_FCMOV
11738 && insn_type != TYPE_IBR)
11741 if ((set = single_set (dep_insn)) != 0)
11743 set = SET_DEST (set);
11746 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
11747 && XVECLEN (PATTERN (dep_insn), 0) == 2
11748 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
11749 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
11751 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11752 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
11757 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
11760 /* This test is true if the dependent insn reads the flags but
11761 not any other potentially set register. */
11762 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11765 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11771 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11772 address with operands set by DEP_INSN. */
11775 ix86_agi_dependant (insn, dep_insn, insn_type)
11776 rtx insn, dep_insn;
11777 enum attr_type insn_type;
11781 if (insn_type == TYPE_LEA
11784 addr = PATTERN (insn);
11785 if (GET_CODE (addr) == SET)
11787 else if (GET_CODE (addr) == PARALLEL
11788 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11789 addr = XVECEXP (addr, 0, 0);
11792 addr = SET_SRC (addr);
11797 extract_insn_cached (insn);
11798 for (i = recog_data.n_operands - 1; i >= 0; --i)
11799 if (GET_CODE (recog_data.operand[i]) == MEM)
11801 addr = XEXP (recog_data.operand[i], 0);
11808 return modified_in_p (addr, dep_insn);
11812 ix86_adjust_cost (insn, link, dep_insn, cost)
11813 rtx insn, link, dep_insn;
11816 enum attr_type insn_type, dep_insn_type;
11817 enum attr_memory memory, dep_memory;
11819 int dep_insn_code_number;
11821 /* Anti and output dependencies have zero cost on all CPUs. */
11822 if (REG_NOTE_KIND (link) != 0)
11825 dep_insn_code_number = recog_memoized (dep_insn);
11827 /* If we can't recognize the insns, we can't really do anything. */
11828 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
11831 insn_type = get_attr_type (insn);
11832 dep_insn_type = get_attr_type (dep_insn);
11836 case PROCESSOR_PENTIUM:
11837 /* Address Generation Interlock adds a cycle of latency. */
11838 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11841 /* ??? Compares pair with jump/setcc. */
11842 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11845 /* Floating point stores require value to be ready one cycle earlier. */
11846 if (insn_type == TYPE_FMOV
11847 && get_attr_memory (insn) == MEMORY_STORE
11848 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11852 case PROCESSOR_PENTIUMPRO:
11853 memory = get_attr_memory (insn);
11854 dep_memory = get_attr_memory (dep_insn);
11856 /* Since we can't represent delayed latencies of load+operation,
11857 increase the cost here for non-imov insns. */
11858 if (dep_insn_type != TYPE_IMOV
11859 && dep_insn_type != TYPE_FMOV
11860 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
11863 /* INT->FP conversion is expensive. */
11864 if (get_attr_fp_int_src (dep_insn))
11867 /* There is one cycle extra latency between an FP op and a store. */
11868 if (insn_type == TYPE_FMOV
11869 && (set = single_set (dep_insn)) != NULL_RTX
11870 && (set2 = single_set (insn)) != NULL_RTX
11871 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11872 && GET_CODE (SET_DEST (set2)) == MEM)
11875 /* Show ability of reorder buffer to hide latency of load by executing
11876 in parallel with previous instruction in case
11877 previous instruction is not needed to compute the address. */
11878 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11879 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11881 /* Claim moves to take one cycle, as core can issue one load
11882 at time and the next load can start cycle later. */
11883 if (dep_insn_type == TYPE_IMOV
11884 || dep_insn_type == TYPE_FMOV)
11892 memory = get_attr_memory (insn);
11893 dep_memory = get_attr_memory (dep_insn);
11894 /* The esp dependency is resolved before the instruction is really
11896 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11897 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11900 /* Since we can't represent delayed latencies of load+operation,
11901 increase the cost here for non-imov insns. */
11902 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
11903 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11905 /* INT->FP conversion is expensive. */
11906 if (get_attr_fp_int_src (dep_insn))
11909 /* Show ability of reorder buffer to hide latency of load by executing
11910 in parallel with previous instruction in case
11911 previous instruction is not needed to compute the address. */
11912 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11913 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11915 /* Claim moves to take one cycle, as core can issue one load
11916 at time and the next load can start cycle later. */
11917 if (dep_insn_type == TYPE_IMOV
11918 || dep_insn_type == TYPE_FMOV)
11927 case PROCESSOR_ATHLON:
11929 memory = get_attr_memory (insn);
11930 dep_memory = get_attr_memory (dep_insn);
11932 /* Show ability of reorder buffer to hide latency of load by executing
11933 in parallel with previous instruction in case
11934 previous instruction is not needed to compute the address. */
11935 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11936 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11938 /* Claim moves to take one cycle, as core can issue one load
11939 at time and the next load can start cycle later. */
11940 if (dep_insn_type == TYPE_IMOV
11941 || dep_insn_type == TYPE_FMOV)
11943 else if (cost >= 3)
11958 struct ppro_sched_data
11961 int issued_this_cycle;
11965 static enum attr_ppro_uops
11966 ix86_safe_ppro_uops (insn)
11969 if (recog_memoized (insn) >= 0)
11970 return get_attr_ppro_uops (insn);
11972 return PPRO_UOPS_MANY;
11976 ix86_dump_ppro_packet (dump)
11979 if (ix86_sched_data.ppro.decode[0])
11981 fprintf (dump, "PPRO packet: %d",
11982 INSN_UID (ix86_sched_data.ppro.decode[0]));
11983 if (ix86_sched_data.ppro.decode[1])
11984 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11985 if (ix86_sched_data.ppro.decode[2])
11986 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11987 fputc ('\n', dump);
11991 /* We're beginning a new block. Initialize data structures as necessary. */
11994 ix86_sched_init (dump, sched_verbose, veclen)
11995 FILE *dump ATTRIBUTE_UNUSED;
11996 int sched_verbose ATTRIBUTE_UNUSED;
11997 int veclen ATTRIBUTE_UNUSED;
11999 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12002 /* Shift INSN to SLOT, and shift everything else down. */
12005 ix86_reorder_insn (insnp, slot)
12012 insnp[0] = insnp[1];
12013 while (++insnp != slot);
12019 ix86_sched_reorder_ppro (ready, e_ready)
12024 enum attr_ppro_uops cur_uops;
12025 int issued_this_cycle;
12029 /* At this point .ppro.decode contains the state of the three
12030 decoders from last "cycle". That is, those insns that were
12031 actually independent. But here we're scheduling for the
12032 decoder, and we may find things that are decodable in the
12035 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12036 issued_this_cycle = 0;
12039 cur_uops = ix86_safe_ppro_uops (*insnp);
12041 /* If the decoders are empty, and we've a complex insn at the
12042 head of the priority queue, let it issue without complaint. */
12043 if (decode[0] == NULL)
12045 if (cur_uops == PPRO_UOPS_MANY)
12047 decode[0] = *insnp;
12051 /* Otherwise, search for a 2-4 uop unsn to issue. */
12052 while (cur_uops != PPRO_UOPS_FEW)
12054 if (insnp == ready)
12056 cur_uops = ix86_safe_ppro_uops (*--insnp);
12059 /* If so, move it to the head of the line. */
12060 if (cur_uops == PPRO_UOPS_FEW)
12061 ix86_reorder_insn (insnp, e_ready);
12063 /* Issue the head of the queue. */
12064 issued_this_cycle = 1;
12065 decode[0] = *e_ready--;
12068 /* Look for simple insns to fill in the other two slots. */
12069 for (i = 1; i < 3; ++i)
12070 if (decode[i] == NULL)
12072 if (ready > e_ready)
12076 cur_uops = ix86_safe_ppro_uops (*insnp);
12077 while (cur_uops != PPRO_UOPS_ONE)
12079 if (insnp == ready)
12081 cur_uops = ix86_safe_ppro_uops (*--insnp);
12084 /* Found one. Move it to the head of the queue and issue it. */
12085 if (cur_uops == PPRO_UOPS_ONE)
12087 ix86_reorder_insn (insnp, e_ready);
12088 decode[i] = *e_ready--;
12089 issued_this_cycle++;
12093 /* ??? Didn't find one. Ideally, here we would do a lazy split
12094 of 2-uop insns, issue one and queue the other. */
12098 if (issued_this_cycle == 0)
12099 issued_this_cycle = 1;
12100 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12103 /* We are about to being issuing insns for this clock cycle.
12104 Override the default sort algorithm to better slot instructions. */
12106 ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
12107 FILE *dump ATTRIBUTE_UNUSED;
12108 int sched_verbose ATTRIBUTE_UNUSED;
12111 int clock_var ATTRIBUTE_UNUSED;
12113 int n_ready = *n_readyp;
12114 rtx *e_ready = ready + n_ready - 1;
12116 /* Make sure to go ahead and initialize key items in
12117 ix86_sched_data if we are not going to bother trying to
12118 reorder the ready queue. */
12121 ix86_sched_data.ppro.issued_this_cycle = 1;
12130 case PROCESSOR_PENTIUMPRO:
12131 ix86_sched_reorder_ppro (ready, e_ready);
12136 return ix86_issue_rate ();
12139 /* We are about to issue INSN. Return the number of insns left on the
12140 ready queue that can be issued this cycle. */
12143 ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
12147 int can_issue_more;
12153 return can_issue_more - 1;
12155 case PROCESSOR_PENTIUMPRO:
12157 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12159 if (uops == PPRO_UOPS_MANY)
12162 ix86_dump_ppro_packet (dump);
12163 ix86_sched_data.ppro.decode[0] = insn;
12164 ix86_sched_data.ppro.decode[1] = NULL;
12165 ix86_sched_data.ppro.decode[2] = NULL;
12167 ix86_dump_ppro_packet (dump);
12168 ix86_sched_data.ppro.decode[0] = NULL;
12170 else if (uops == PPRO_UOPS_FEW)
12173 ix86_dump_ppro_packet (dump);
12174 ix86_sched_data.ppro.decode[0] = insn;
12175 ix86_sched_data.ppro.decode[1] = NULL;
12176 ix86_sched_data.ppro.decode[2] = NULL;
12180 for (i = 0; i < 3; ++i)
12181 if (ix86_sched_data.ppro.decode[i] == NULL)
12183 ix86_sched_data.ppro.decode[i] = insn;
12191 ix86_dump_ppro_packet (dump);
12192 ix86_sched_data.ppro.decode[0] = NULL;
12193 ix86_sched_data.ppro.decode[1] = NULL;
12194 ix86_sched_data.ppro.decode[2] = NULL;
12198 return --ix86_sched_data.ppro.issued_this_cycle;
12203 ia32_use_dfa_pipeline_interface ()
12205 if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12210 /* How many alternative schedules to try. This should be as wide as the
12211 scheduling freedom in the DFA, but no wider. Making this value too
12212 large results extra work for the scheduler. */
12215 ia32_multipass_dfa_lookahead ()
12217 if (ix86_cpu == PROCESSOR_PENTIUM)
12224 /* Walk through INSNS and look for MEM references whose address is DSTREG or
12225 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
12229 ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
12231 rtx dstref, srcref, dstreg, srcreg;
12235 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
12237 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
12241 /* Subroutine of above to actually do the updating by recursively walking
12245 ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
12247 rtx dstref, srcref, dstreg, srcreg;
12249 enum rtx_code code = GET_CODE (x);
12250 const char *format_ptr = GET_RTX_FORMAT (code);
12253 if (code == MEM && XEXP (x, 0) == dstreg)
12254 MEM_COPY_ATTRIBUTES (x, dstref);
12255 else if (code == MEM && XEXP (x, 0) == srcreg)
12256 MEM_COPY_ATTRIBUTES (x, srcref);
12258 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
12260 if (*format_ptr == 'e')
12261 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
12263 else if (*format_ptr == 'E')
12264 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12265 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
12270 /* Compute the alignment given to a constant that is being placed in memory.
12271 EXP is the constant and ALIGN is the alignment that the object would
12273 The value of this function is used instead of that alignment to align
12277 ix86_constant_alignment (exp, align)
12281 if (TREE_CODE (exp) == REAL_CST)
12283 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12285 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12288 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
12295 /* Compute the alignment for a static variable.
12296 TYPE is the data type, and ALIGN is the alignment that
12297 the object would ordinarily have. The value of this function is used
12298 instead of that alignment to align the object. */
12301 ix86_data_alignment (type, align)
12305 if (AGGREGATE_TYPE_P (type)
12306 && TYPE_SIZE (type)
12307 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12308 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12309 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12312 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12313 to 16byte boundary. */
12316 if (AGGREGATE_TYPE_P (type)
12317 && TYPE_SIZE (type)
12318 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12319 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12320 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12324 if (TREE_CODE (type) == ARRAY_TYPE)
12326 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12328 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12331 else if (TREE_CODE (type) == COMPLEX_TYPE)
12334 if (TYPE_MODE (type) == DCmode && align < 64)
12336 if (TYPE_MODE (type) == XCmode && align < 128)
12339 else if ((TREE_CODE (type) == RECORD_TYPE
12340 || TREE_CODE (type) == UNION_TYPE
12341 || TREE_CODE (type) == QUAL_UNION_TYPE)
12342 && TYPE_FIELDS (type))
12344 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12346 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12349 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12350 || TREE_CODE (type) == INTEGER_TYPE)
12352 if (TYPE_MODE (type) == DFmode && align < 64)
12354 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12361 /* Compute the alignment for a local variable.
12362 TYPE is the data type, and ALIGN is the alignment that
12363 the object would ordinarily have. The value of this macro is used
12364 instead of that alignment to align the object. */
12367 ix86_local_alignment (type, align)
12371 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12372 to 16byte boundary. */
12375 if (AGGREGATE_TYPE_P (type)
12376 && TYPE_SIZE (type)
12377 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12378 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12379 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12382 if (TREE_CODE (type) == ARRAY_TYPE)
12384 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12386 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12389 else if (TREE_CODE (type) == COMPLEX_TYPE)
12391 if (TYPE_MODE (type) == DCmode && align < 64)
12393 if (TYPE_MODE (type) == XCmode && align < 128)
12396 else if ((TREE_CODE (type) == RECORD_TYPE
12397 || TREE_CODE (type) == UNION_TYPE
12398 || TREE_CODE (type) == QUAL_UNION_TYPE)
12399 && TYPE_FIELDS (type))
12401 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12403 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12406 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12407 || TREE_CODE (type) == INTEGER_TYPE)
12410 if (TYPE_MODE (type) == DFmode && align < 64)
12412 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12418 /* Emit RTL insns to initialize the variable parts of a trampoline.
12419 FNADDR is an RTX for the address of the function's pure code.
12420 CXT is an RTX for the static chain value for the function. */
12422 x86_initialize_trampoline (tramp, fnaddr, cxt)
12423 rtx tramp, fnaddr, cxt;
12427 /* Compute offset from the end of the jmp to the target function. */
12428 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12429 plus_constant (tramp, 10),
12430 NULL_RTX, 1, OPTAB_DIRECT);
12431 emit_move_insn (gen_rtx_MEM (QImode, tramp),
12432 gen_int_mode (0xb9, QImode));
12433 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12434 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12435 gen_int_mode (0xe9, QImode));
12436 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12441 /* Try to load address using shorter movl instead of movabs.
12442 We may want to support movq for kernel mode, but kernel does not use
12443 trampolines at the moment. */
12444 if (x86_64_zero_extended_value (fnaddr))
12446 fnaddr = copy_to_mode_reg (DImode, fnaddr);
12447 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12448 gen_int_mode (0xbb41, HImode));
12449 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12450 gen_lowpart (SImode, fnaddr));
12455 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12456 gen_int_mode (0xbb49, HImode));
12457 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12461 /* Load static chain using movabs to r10. */
12462 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12463 gen_int_mode (0xba49, HImode));
12464 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12467 /* Jump to the r11 */
12468 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12469 gen_int_mode (0xff49, HImode));
12470 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12471 gen_int_mode (0xe3, QImode));
12473 if (offset > TRAMPOLINE_SIZE)
12477 #ifdef TRANSFER_FROM_TRAMPOLINE
12478 emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12479 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12483 #define def_builtin(MASK, NAME, TYPE, CODE) \
12485 if ((MASK) & target_flags) \
12486 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
12487 NULL, NULL_TREE); \
12490 struct builtin_description
12492 const unsigned int mask;
12493 const enum insn_code icode;
12494 const char *const name;
12495 const enum ix86_builtins code;
12496 const enum rtx_code comparison;
12497 const unsigned int flag;
12500 /* Used for builtins that are enabled both by -msse and -msse2. */
12501 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
12503 static const struct builtin_description bdesc_comi[] =
12505 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12506 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12507 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12508 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12509 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12510 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12511 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12512 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12513 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12514 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12515 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12516 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12517 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12518 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12519 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12520 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12521 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12522 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12523 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12524 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12525 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12526 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12527 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12528 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12531 static const struct builtin_description bdesc_2arg[] =
12534 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12535 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12536 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12537 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12538 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12539 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12540 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12541 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12543 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12544 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12545 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12546 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12547 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12548 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12549 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12550 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12551 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12552 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12553 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12554 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12555 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12556 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12557 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12558 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12559 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12560 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12561 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12562 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12564 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12565 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12566 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12567 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12569 { MASK_SSE1, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12570 { MASK_SSE1, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12571 { MASK_SSE1, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12572 { MASK_SSE1, CODE_FOR_sse_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12574 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12575 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12576 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12577 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12578 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12581 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12582 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12583 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12584 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12585 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12586 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12588 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12589 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12590 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12591 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12592 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12593 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12594 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12595 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12597 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12598 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12599 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12601 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12602 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12603 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12604 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12606 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12607 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12609 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12610 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12611 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12612 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12613 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12614 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12616 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12617 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12618 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12619 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12621 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12622 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12623 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12624 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12625 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12626 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12629 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12630 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12631 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12633 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12634 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12636 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12637 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12638 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12639 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12640 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12641 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
12643 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
12644 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
12645 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
12646 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
12647 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
12648 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
12650 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
12651 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
12652 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
12653 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
12655 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
12656 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
12659 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
12660 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
12661 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
12662 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
12663 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
12664 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
12665 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
12666 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
12668 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
12669 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
12670 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
12671 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
12672 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
12673 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
12674 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
12675 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
12676 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
12677 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
12678 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
12679 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
12680 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
12681 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
12682 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
12683 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
12684 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
12685 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
12686 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
12687 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
12689 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
12690 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
12691 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
12692 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
12694 { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
12695 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
12696 { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
12697 { MASK_SSE2, CODE_FOR_sse2_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
12699 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
12700 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
12701 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
12704 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
12705 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
12706 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
12707 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
12708 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
12709 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
12710 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
12711 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
12713 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
12714 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
12715 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
12716 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
12717 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
12718 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
12719 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
12720 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
12722 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
12723 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
12724 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
12725 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
12727 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
12728 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
12729 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
12730 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
12732 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
12733 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
12735 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
12736 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
12737 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
12738 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
12739 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
12740 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
12742 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
12743 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
12744 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
12745 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
12747 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
12748 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
12749 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
12750 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
12751 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
12752 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
12753 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
12754 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
12756 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12757 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12758 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12760 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12761 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12763 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12764 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12765 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12766 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12767 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12768 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12770 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12771 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12772 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12773 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12774 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12775 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12777 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12778 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12779 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12780 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12782 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12784 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12785 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12786 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
12789 static const struct builtin_description bdesc_1arg[] =
12791 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12792 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12794 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12795 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12796 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12798 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12799 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12800 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12801 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12803 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12804 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12805 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12806 { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
12808 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12810 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12811 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
12813 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12814 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12815 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12816 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12817 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
12819 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
12821 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12822 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12824 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12825 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12826 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
12828 { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }
12832 ix86_init_builtins ()
12835 ix86_init_mmx_sse_builtins ();
12838 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
12839 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12842 ix86_init_mmx_sse_builtins ()
12844 const struct builtin_description * d;
12847 tree pchar_type_node = build_pointer_type (char_type_node);
12848 tree pcchar_type_node = build_pointer_type (
12849 build_type_variant (char_type_node, 1, 0));
12850 tree pfloat_type_node = build_pointer_type (float_type_node);
12851 tree pcfloat_type_node = build_pointer_type (
12852 build_type_variant (float_type_node, 1, 0));
12853 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
12854 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
12855 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12858 tree int_ftype_v4sf_v4sf
12859 = build_function_type_list (integer_type_node,
12860 V4SF_type_node, V4SF_type_node, NULL_TREE);
12861 tree v4si_ftype_v4sf_v4sf
12862 = build_function_type_list (V4SI_type_node,
12863 V4SF_type_node, V4SF_type_node, NULL_TREE);
12864 /* MMX/SSE/integer conversions. */
12865 tree int_ftype_v4sf
12866 = build_function_type_list (integer_type_node,
12867 V4SF_type_node, NULL_TREE);
12868 tree int_ftype_v8qi
12869 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
12870 tree v4sf_ftype_v4sf_int
12871 = build_function_type_list (V4SF_type_node,
12872 V4SF_type_node, integer_type_node, NULL_TREE);
12873 tree v4sf_ftype_v4sf_v2si
12874 = build_function_type_list (V4SF_type_node,
12875 V4SF_type_node, V2SI_type_node, NULL_TREE);
12876 tree int_ftype_v4hi_int
12877 = build_function_type_list (integer_type_node,
12878 V4HI_type_node, integer_type_node, NULL_TREE);
12879 tree v4hi_ftype_v4hi_int_int
12880 = build_function_type_list (V4HI_type_node, V4HI_type_node,
12881 integer_type_node, integer_type_node,
12883 /* Miscellaneous. */
12884 tree v8qi_ftype_v4hi_v4hi
12885 = build_function_type_list (V8QI_type_node,
12886 V4HI_type_node, V4HI_type_node, NULL_TREE);
12887 tree v4hi_ftype_v2si_v2si
12888 = build_function_type_list (V4HI_type_node,
12889 V2SI_type_node, V2SI_type_node, NULL_TREE);
12890 tree v4sf_ftype_v4sf_v4sf_int
12891 = build_function_type_list (V4SF_type_node,
12892 V4SF_type_node, V4SF_type_node,
12893 integer_type_node, NULL_TREE);
12894 tree v2si_ftype_v4hi_v4hi
12895 = build_function_type_list (V2SI_type_node,
12896 V4HI_type_node, V4HI_type_node, NULL_TREE);
12897 tree v4hi_ftype_v4hi_int
12898 = build_function_type_list (V4HI_type_node,
12899 V4HI_type_node, integer_type_node, NULL_TREE);
12900 tree v4hi_ftype_v4hi_di
12901 = build_function_type_list (V4HI_type_node,
12902 V4HI_type_node, long_long_unsigned_type_node,
12904 tree v2si_ftype_v2si_di
12905 = build_function_type_list (V2SI_type_node,
12906 V2SI_type_node, long_long_unsigned_type_node,
12908 tree void_ftype_void
12909 = build_function_type (void_type_node, void_list_node);
12910 tree void_ftype_unsigned
12911 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
12912 tree unsigned_ftype_void
12913 = build_function_type (unsigned_type_node, void_list_node);
12915 = build_function_type (long_long_unsigned_type_node, void_list_node);
12916 tree v4sf_ftype_void
12917 = build_function_type (V4SF_type_node, void_list_node);
12918 tree v2si_ftype_v4sf
12919 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
12920 /* Loads/stores. */
12921 tree void_ftype_v8qi_v8qi_pchar
12922 = build_function_type_list (void_type_node,
12923 V8QI_type_node, V8QI_type_node,
12924 pchar_type_node, NULL_TREE);
12925 tree v4sf_ftype_pcfloat
12926 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
12927 /* @@@ the type is bogus */
12928 tree v4sf_ftype_v4sf_pv2si
12929 = build_function_type_list (V4SF_type_node,
12930 V4SF_type_node, pv2si_type_node, NULL_TREE);
12931 tree void_ftype_pv2si_v4sf
12932 = build_function_type_list (void_type_node,
12933 pv2si_type_node, V4SF_type_node, NULL_TREE);
12934 tree void_ftype_pfloat_v4sf
12935 = build_function_type_list (void_type_node,
12936 pfloat_type_node, V4SF_type_node, NULL_TREE);
12937 tree void_ftype_pdi_di
12938 = build_function_type_list (void_type_node,
12939 pdi_type_node, long_long_unsigned_type_node,
12941 tree void_ftype_pv2di_v2di
12942 = build_function_type_list (void_type_node,
12943 pv2di_type_node, V2DI_type_node, NULL_TREE);
12944 /* Normal vector unops. */
12945 tree v4sf_ftype_v4sf
12946 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
12948 /* Normal vector binops. */
12949 tree v4sf_ftype_v4sf_v4sf
12950 = build_function_type_list (V4SF_type_node,
12951 V4SF_type_node, V4SF_type_node, NULL_TREE);
12952 tree v8qi_ftype_v8qi_v8qi
12953 = build_function_type_list (V8QI_type_node,
12954 V8QI_type_node, V8QI_type_node, NULL_TREE);
12955 tree v4hi_ftype_v4hi_v4hi
12956 = build_function_type_list (V4HI_type_node,
12957 V4HI_type_node, V4HI_type_node, NULL_TREE);
12958 tree v2si_ftype_v2si_v2si
12959 = build_function_type_list (V2SI_type_node,
12960 V2SI_type_node, V2SI_type_node, NULL_TREE);
12961 tree di_ftype_di_di
12962 = build_function_type_list (long_long_unsigned_type_node,
12963 long_long_unsigned_type_node,
12964 long_long_unsigned_type_node, NULL_TREE);
12966 tree v2si_ftype_v2sf
12967 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
12968 tree v2sf_ftype_v2si
12969 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
12970 tree v2si_ftype_v2si
12971 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
12972 tree v2sf_ftype_v2sf
12973 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
12974 tree v2sf_ftype_v2sf_v2sf
12975 = build_function_type_list (V2SF_type_node,
12976 V2SF_type_node, V2SF_type_node, NULL_TREE);
12977 tree v2si_ftype_v2sf_v2sf
12978 = build_function_type_list (V2SI_type_node,
12979 V2SF_type_node, V2SF_type_node, NULL_TREE);
12980 tree pint_type_node = build_pointer_type (integer_type_node);
12981 tree pcint_type_node = build_pointer_type (
12982 build_type_variant (integer_type_node, 1, 0));
12983 tree pdouble_type_node = build_pointer_type (double_type_node);
12984 tree pcdouble_type_node = build_pointer_type (
12985 build_type_variant (double_type_node, 1, 0));
12986 tree int_ftype_v2df_v2df
12987 = build_function_type_list (integer_type_node,
12988 V2DF_type_node, V2DF_type_node, NULL_TREE);
12991 = build_function_type (intTI_type_node, void_list_node);
12992 tree v2di_ftype_void
12993 = build_function_type (V2DI_type_node, void_list_node);
12994 tree ti_ftype_ti_ti
12995 = build_function_type_list (intTI_type_node,
12996 intTI_type_node, intTI_type_node, NULL_TREE);
12997 tree void_ftype_pcvoid
12998 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13000 = build_function_type_list (V2DI_type_node,
13001 long_long_unsigned_type_node, NULL_TREE);
13003 = build_function_type_list (long_long_unsigned_type_node,
13004 V2DI_type_node, NULL_TREE);
13005 tree v4sf_ftype_v4si
13006 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13007 tree v4si_ftype_v4sf
13008 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13009 tree v2df_ftype_v4si
13010 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13011 tree v4si_ftype_v2df
13012 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13013 tree v2si_ftype_v2df
13014 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13015 tree v4sf_ftype_v2df
13016 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13017 tree v2df_ftype_v2si
13018 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13019 tree v2df_ftype_v4sf
13020 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13021 tree int_ftype_v2df
13022 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13023 tree v2df_ftype_v2df_int
13024 = build_function_type_list (V2DF_type_node,
13025 V2DF_type_node, integer_type_node, NULL_TREE);
13026 tree v4sf_ftype_v4sf_v2df
13027 = build_function_type_list (V4SF_type_node,
13028 V4SF_type_node, V2DF_type_node, NULL_TREE);
13029 tree v2df_ftype_v2df_v4sf
13030 = build_function_type_list (V2DF_type_node,
13031 V2DF_type_node, V4SF_type_node, NULL_TREE);
13032 tree v2df_ftype_v2df_v2df_int
13033 = build_function_type_list (V2DF_type_node,
13034 V2DF_type_node, V2DF_type_node,
13037 tree v2df_ftype_v2df_pv2si
13038 = build_function_type_list (V2DF_type_node,
13039 V2DF_type_node, pv2si_type_node, NULL_TREE);
13040 tree void_ftype_pv2si_v2df
13041 = build_function_type_list (void_type_node,
13042 pv2si_type_node, V2DF_type_node, NULL_TREE);
13043 tree void_ftype_pdouble_v2df
13044 = build_function_type_list (void_type_node,
13045 pdouble_type_node, V2DF_type_node, NULL_TREE);
13046 tree void_ftype_pint_int
13047 = build_function_type_list (void_type_node,
13048 pint_type_node, integer_type_node, NULL_TREE);
13049 tree void_ftype_v16qi_v16qi_pchar
13050 = build_function_type_list (void_type_node,
13051 V16QI_type_node, V16QI_type_node,
13052 pchar_type_node, NULL_TREE);
13053 tree v2df_ftype_pcdouble
13054 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13055 tree v2df_ftype_v2df_v2df
13056 = build_function_type_list (V2DF_type_node,
13057 V2DF_type_node, V2DF_type_node, NULL_TREE);
13058 tree v16qi_ftype_v16qi_v16qi
13059 = build_function_type_list (V16QI_type_node,
13060 V16QI_type_node, V16QI_type_node, NULL_TREE);
13061 tree v8hi_ftype_v8hi_v8hi
13062 = build_function_type_list (V8HI_type_node,
13063 V8HI_type_node, V8HI_type_node, NULL_TREE);
13064 tree v4si_ftype_v4si_v4si
13065 = build_function_type_list (V4SI_type_node,
13066 V4SI_type_node, V4SI_type_node, NULL_TREE);
13067 tree v2di_ftype_v2di_v2di
13068 = build_function_type_list (V2DI_type_node,
13069 V2DI_type_node, V2DI_type_node, NULL_TREE);
13070 tree v2di_ftype_v2df_v2df
13071 = build_function_type_list (V2DI_type_node,
13072 V2DF_type_node, V2DF_type_node, NULL_TREE);
13073 tree v2df_ftype_v2df
13074 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13075 tree v2df_ftype_double
13076 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13077 tree v2df_ftype_double_double
13078 = build_function_type_list (V2DF_type_node,
13079 double_type_node, double_type_node, NULL_TREE);
13080 tree int_ftype_v8hi_int
13081 = build_function_type_list (integer_type_node,
13082 V8HI_type_node, integer_type_node, NULL_TREE);
13083 tree v8hi_ftype_v8hi_int_int
13084 = build_function_type_list (V8HI_type_node,
13085 V8HI_type_node, integer_type_node,
13086 integer_type_node, NULL_TREE);
13087 tree v2di_ftype_v2di_int
13088 = build_function_type_list (V2DI_type_node,
13089 V2DI_type_node, integer_type_node, NULL_TREE);
13090 tree v4si_ftype_v4si_int
13091 = build_function_type_list (V4SI_type_node,
13092 V4SI_type_node, integer_type_node, NULL_TREE);
13093 tree v8hi_ftype_v8hi_int
13094 = build_function_type_list (V8HI_type_node,
13095 V8HI_type_node, integer_type_node, NULL_TREE);
13096 tree v8hi_ftype_v8hi_v2di
13097 = build_function_type_list (V8HI_type_node,
13098 V8HI_type_node, V2DI_type_node, NULL_TREE);
13099 tree v4si_ftype_v4si_v2di
13100 = build_function_type_list (V4SI_type_node,
13101 V4SI_type_node, V2DI_type_node, NULL_TREE);
13102 tree v4si_ftype_v8hi_v8hi
13103 = build_function_type_list (V4SI_type_node,
13104 V8HI_type_node, V8HI_type_node, NULL_TREE);
13105 tree di_ftype_v8qi_v8qi
13106 = build_function_type_list (long_long_unsigned_type_node,
13107 V8QI_type_node, V8QI_type_node, NULL_TREE);
13108 tree v2di_ftype_v16qi_v16qi
13109 = build_function_type_list (V2DI_type_node,
13110 V16QI_type_node, V16QI_type_node, NULL_TREE);
13111 tree int_ftype_v16qi
13112 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13113 tree v16qi_ftype_pcchar
13114 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13115 tree void_ftype_pchar_v16qi
13116 = build_function_type_list (void_type_node,
13117 pchar_type_node, V16QI_type_node, NULL_TREE);
13118 tree v4si_ftype_pcint
13119 = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13120 tree void_ftype_pcint_v4si
13121 = build_function_type_list (void_type_node,
13122 pcint_type_node, V4SI_type_node, NULL_TREE);
13123 tree v2di_ftype_v2di
13124 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13126 /* Add all builtins that are more or less simple operations on two
13128 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13130 /* Use one of the operands; the target can have a different mode for
13131 mask-generating compares. */
13132 enum machine_mode mode;
13137 mode = insn_data[d->icode].operand[1].mode;
13142 type = v16qi_ftype_v16qi_v16qi;
13145 type = v8hi_ftype_v8hi_v8hi;
13148 type = v4si_ftype_v4si_v4si;
13151 type = v2di_ftype_v2di_v2di;
13154 type = v2df_ftype_v2df_v2df;
13157 type = ti_ftype_ti_ti;
13160 type = v4sf_ftype_v4sf_v4sf;
13163 type = v8qi_ftype_v8qi_v8qi;
13166 type = v4hi_ftype_v4hi_v4hi;
13169 type = v2si_ftype_v2si_v2si;
13172 type = di_ftype_di_di;
13179 /* Override for comparisons. */
13180 if (d->icode == CODE_FOR_maskcmpv4sf3
13181 || d->icode == CODE_FOR_maskncmpv4sf3
13182 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13183 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13184 type = v4si_ftype_v4sf_v4sf;
13186 if (d->icode == CODE_FOR_maskcmpv2df3
13187 || d->icode == CODE_FOR_maskncmpv2df3
13188 || d->icode == CODE_FOR_vmmaskcmpv2df3
13189 || d->icode == CODE_FOR_vmmaskncmpv2df3)
13190 type = v2di_ftype_v2df_v2df;
13192 def_builtin (d->mask, d->name, type, d->code);
13195 /* Add the remaining MMX insns with somewhat more complicated types. */
13196 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13197 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13198 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13199 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13200 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13202 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13203 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13204 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13206 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13207 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13209 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13210 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13212 /* comi/ucomi insns. */
13213 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13214 if (d->mask == MASK_SSE2)
13215 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13217 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13219 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13220 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13221 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13223 def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13224 def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13225 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13226 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13227 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13228 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13229 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13230 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13232 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13233 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13235 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13237 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13238 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13239 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13240 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13241 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13242 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13244 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13245 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13246 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13247 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13249 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13250 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13251 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13252 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13254 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13256 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13258 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13259 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13260 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13261 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13262 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13263 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13265 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13267 /* Original 3DNow! */
13268 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13269 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13270 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13285 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13286 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13287 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13289 /* 3DNow! extension as used in the Athlon CPU. */
13290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13291 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13295 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13297 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13300 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13301 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13303 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13304 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13305 def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13307 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13308 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13309 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13310 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13311 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13312 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13314 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13315 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13316 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13317 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13319 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13320 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13321 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13322 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13323 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13325 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13326 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13327 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13328 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13330 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13331 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13333 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13335 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13336 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13338 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13339 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13340 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13341 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13342 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13344 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13346 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13347 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13349 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13350 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13351 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13353 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13354 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13355 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13357 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13358 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13359 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13360 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13361 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13362 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13363 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13365 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13366 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13367 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13369 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13370 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13371 def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13372 def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13373 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13374 def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13375 def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13377 def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13379 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13380 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13381 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13383 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13384 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13385 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13387 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13388 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13390 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13391 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13392 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13393 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13395 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13396 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13397 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13398 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13400 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13401 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13403 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13406 /* Errors in the source file can cause expand_expr to return const0_rtx
13407 where we expect a vector. To avoid crashing, use one of the vector
13408 clear instructions. */
13410 safe_vector_operand (x, mode)
13412 enum machine_mode mode;
13414 if (x != const0_rtx)
13416 x = gen_reg_rtx (mode);
13418 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13419 emit_insn (gen_mmx_clrdi (mode == DImode ? x
13420 : gen_rtx_SUBREG (DImode, x, 0)));
13422 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13423 : gen_rtx_SUBREG (V4SFmode, x, 0),
13424 CONST0_RTX (V4SFmode)));
13428 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
13431 ix86_expand_binop_builtin (icode, arglist, target)
13432 enum insn_code icode;
13437 tree arg0 = TREE_VALUE (arglist);
13438 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13439 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13440 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13441 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13442 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13443 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13445 if (VECTOR_MODE_P (mode0))
13446 op0 = safe_vector_operand (op0, mode0);
13447 if (VECTOR_MODE_P (mode1))
13448 op1 = safe_vector_operand (op1, mode1);
13451 || GET_MODE (target) != tmode
13452 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13453 target = gen_reg_rtx (tmode);
13455 /* In case the insn wants input operands in modes different from
13456 the result, abort. */
13457 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
13460 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13461 op0 = copy_to_mode_reg (mode0, op0);
13462 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13463 op1 = copy_to_mode_reg (mode1, op1);
13465 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13466 yet one of the two must not be a memory. This is normally enforced
13467 by expanders, but we didn't bother to create one here. */
13468 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13469 op0 = copy_to_mode_reg (mode0, op0);
13471 pat = GEN_FCN (icode) (target, op0, op1);
13478 /* Subroutine of ix86_expand_builtin to take care of stores. */
13481 ix86_expand_store_builtin (icode, arglist)
13482 enum insn_code icode;
13486 tree arg0 = TREE_VALUE (arglist);
13487 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13488 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13489 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13490 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13491 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13493 if (VECTOR_MODE_P (mode1))
13494 op1 = safe_vector_operand (op1, mode1);
13496 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13498 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13499 op1 = copy_to_mode_reg (mode1, op1);
13501 pat = GEN_FCN (icode) (op0, op1);
13507 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
13510 ix86_expand_unop_builtin (icode, arglist, target, do_load)
13511 enum insn_code icode;
13517 tree arg0 = TREE_VALUE (arglist);
13518 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13519 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13520 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13523 || GET_MODE (target) != tmode
13524 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13525 target = gen_reg_rtx (tmode);
13527 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13530 if (VECTOR_MODE_P (mode0))
13531 op0 = safe_vector_operand (op0, mode0);
13533 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13534 op0 = copy_to_mode_reg (mode0, op0);
13537 pat = GEN_FCN (icode) (target, op0);
13544 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13545 sqrtss, rsqrtss, rcpss. */
13548 ix86_expand_unop1_builtin (icode, arglist, target)
13549 enum insn_code icode;
13554 tree arg0 = TREE_VALUE (arglist);
13555 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13556 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13557 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13560 || GET_MODE (target) != tmode
13561 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13562 target = gen_reg_rtx (tmode);
13564 if (VECTOR_MODE_P (mode0))
13565 op0 = safe_vector_operand (op0, mode0);
13567 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13568 op0 = copy_to_mode_reg (mode0, op0);
13571 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
13572 op1 = copy_to_mode_reg (mode0, op1);
13574 pat = GEN_FCN (icode) (target, op0, op1);
13581 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
13584 ix86_expand_sse_compare (d, arglist, target)
13585 const struct builtin_description *d;
13590 tree arg0 = TREE_VALUE (arglist);
13591 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13592 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13593 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13595 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
13596 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
13597 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
13598 enum rtx_code comparison = d->comparison;
13600 if (VECTOR_MODE_P (mode0))
13601 op0 = safe_vector_operand (op0, mode0);
13602 if (VECTOR_MODE_P (mode1))
13603 op1 = safe_vector_operand (op1, mode1);
13605 /* Swap operands if we have a comparison that isn't available in
13609 rtx tmp = gen_reg_rtx (mode1);
13610 emit_move_insn (tmp, op1);
13616 || GET_MODE (target) != tmode
13617 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
13618 target = gen_reg_rtx (tmode);
13620 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
13621 op0 = copy_to_mode_reg (mode0, op0);
13622 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
13623 op1 = copy_to_mode_reg (mode1, op1);
13625 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13626 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
13633 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
13636 ix86_expand_sse_comi (d, arglist, target)
13637 const struct builtin_description *d;
13642 tree arg0 = TREE_VALUE (arglist);
13643 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13644 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13645 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13647 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
13648 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
13649 enum rtx_code comparison = d->comparison;
13651 if (VECTOR_MODE_P (mode0))
13652 op0 = safe_vector_operand (op0, mode0);
13653 if (VECTOR_MODE_P (mode1))
13654 op1 = safe_vector_operand (op1, mode1);
13656 /* Swap operands if we have a comparison that isn't available in
13665 target = gen_reg_rtx (SImode);
13666 emit_move_insn (target, const0_rtx);
13667 target = gen_rtx_SUBREG (QImode, target, 0);
13669 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13670 op0 = copy_to_mode_reg (mode0, op0);
13671 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13672 op1 = copy_to_mode_reg (mode1, op1);
13674 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
13675 pat = GEN_FCN (d->icode) (op0, op1);
13679 emit_insn (gen_rtx_SET (VOIDmode,
13680 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
13681 gen_rtx_fmt_ee (comparison, QImode,
13685 return SUBREG_REG (target);
13688 /* Expand an expression EXP that calls a built-in function,
13689 with result going to TARGET if that's convenient
13690 (and in mode MODE if that's convenient).
13691 SUBTARGET may be used as the target for computing one of EXP's operands.
13692 IGNORE is nonzero if the value is to be ignored. */
13695 ix86_expand_builtin (exp, target, subtarget, mode, ignore)
13698 rtx subtarget ATTRIBUTE_UNUSED;
13699 enum machine_mode mode ATTRIBUTE_UNUSED;
13700 int ignore ATTRIBUTE_UNUSED;
13702 const struct builtin_description *d;
13704 enum insn_code icode;
13705 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
13706 tree arglist = TREE_OPERAND (exp, 1);
13707 tree arg0, arg1, arg2;
13708 rtx op0, op1, op2, pat;
13709 enum machine_mode tmode, mode0, mode1, mode2;
13710 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13714 case IX86_BUILTIN_EMMS:
13715 emit_insn (gen_emms ());
13718 case IX86_BUILTIN_SFENCE:
13719 emit_insn (gen_sfence ());
13722 case IX86_BUILTIN_PEXTRW:
13723 case IX86_BUILTIN_PEXTRW128:
13724 icode = (fcode == IX86_BUILTIN_PEXTRW
13725 ? CODE_FOR_mmx_pextrw
13726 : CODE_FOR_sse2_pextrw);
13727 arg0 = TREE_VALUE (arglist);
13728 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13729 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13730 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13731 tmode = insn_data[icode].operand[0].mode;
13732 mode0 = insn_data[icode].operand[1].mode;
13733 mode1 = insn_data[icode].operand[2].mode;
13735 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13736 op0 = copy_to_mode_reg (mode0, op0);
13737 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13739 /* @@@ better error message */
13740 error ("selector must be an immediate");
13741 return gen_reg_rtx (tmode);
13744 || GET_MODE (target) != tmode
13745 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13746 target = gen_reg_rtx (tmode);
13747 pat = GEN_FCN (icode) (target, op0, op1);
13753 case IX86_BUILTIN_PINSRW:
13754 case IX86_BUILTIN_PINSRW128:
13755 icode = (fcode == IX86_BUILTIN_PINSRW
13756 ? CODE_FOR_mmx_pinsrw
13757 : CODE_FOR_sse2_pinsrw);
13758 arg0 = TREE_VALUE (arglist);
13759 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13760 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13761 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13762 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13763 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13764 tmode = insn_data[icode].operand[0].mode;
13765 mode0 = insn_data[icode].operand[1].mode;
13766 mode1 = insn_data[icode].operand[2].mode;
13767 mode2 = insn_data[icode].operand[3].mode;
13769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13770 op0 = copy_to_mode_reg (mode0, op0);
13771 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13772 op1 = copy_to_mode_reg (mode1, op1);
13773 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13775 /* @@@ better error message */
13776 error ("selector must be an immediate");
13780 || GET_MODE (target) != tmode
13781 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13782 target = gen_reg_rtx (tmode);
13783 pat = GEN_FCN (icode) (target, op0, op1, op2);
13789 case IX86_BUILTIN_MASKMOVQ:
13790 case IX86_BUILTIN_MASKMOVDQU:
13791 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13792 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13793 : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
13794 : CODE_FOR_sse2_maskmovdqu));
13795 /* Note the arg order is different from the operand order. */
13796 arg1 = TREE_VALUE (arglist);
13797 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13798 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13799 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13800 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13801 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13802 mode0 = insn_data[icode].operand[0].mode;
13803 mode1 = insn_data[icode].operand[1].mode;
13804 mode2 = insn_data[icode].operand[2].mode;
13806 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13807 op0 = copy_to_mode_reg (mode0, op0);
13808 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13809 op1 = copy_to_mode_reg (mode1, op1);
13810 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13811 op2 = copy_to_mode_reg (mode2, op2);
13812 pat = GEN_FCN (icode) (op0, op1, op2);
13818 case IX86_BUILTIN_SQRTSS:
13819 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13820 case IX86_BUILTIN_RSQRTSS:
13821 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13822 case IX86_BUILTIN_RCPSS:
13823 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13825 case IX86_BUILTIN_LOADAPS:
13826 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13828 case IX86_BUILTIN_LOADUPS:
13829 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13831 case IX86_BUILTIN_STOREAPS:
13832 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
13834 case IX86_BUILTIN_STOREUPS:
13835 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
13837 case IX86_BUILTIN_LOADSS:
13838 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13840 case IX86_BUILTIN_STORESS:
13841 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
13843 case IX86_BUILTIN_LOADHPS:
13844 case IX86_BUILTIN_LOADLPS:
13845 case IX86_BUILTIN_LOADHPD:
13846 case IX86_BUILTIN_LOADLPD:
13847 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13848 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13849 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13850 : CODE_FOR_sse2_movlpd);
13851 arg0 = TREE_VALUE (arglist);
13852 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13853 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13854 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13855 tmode = insn_data[icode].operand[0].mode;
13856 mode0 = insn_data[icode].operand[1].mode;
13857 mode1 = insn_data[icode].operand[2].mode;
13859 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13860 op0 = copy_to_mode_reg (mode0, op0);
13861 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13863 || GET_MODE (target) != tmode
13864 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13865 target = gen_reg_rtx (tmode);
13866 pat = GEN_FCN (icode) (target, op0, op1);
13872 case IX86_BUILTIN_STOREHPS:
13873 case IX86_BUILTIN_STORELPS:
13874 case IX86_BUILTIN_STOREHPD:
13875 case IX86_BUILTIN_STORELPD:
13876 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13877 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13878 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13879 : CODE_FOR_sse2_movlpd);
13880 arg0 = TREE_VALUE (arglist);
13881 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13882 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13883 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13884 mode0 = insn_data[icode].operand[1].mode;
13885 mode1 = insn_data[icode].operand[2].mode;
13887 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13888 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13889 op1 = copy_to_mode_reg (mode1, op1);
13891 pat = GEN_FCN (icode) (op0, op0, op1);
13897 case IX86_BUILTIN_MOVNTPS:
13898 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
13899 case IX86_BUILTIN_MOVNTQ:
13900 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
13902 case IX86_BUILTIN_LDMXCSR:
13903 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13904 target = assign_386_stack_local (SImode, 0);
13905 emit_move_insn (target, op0);
13906 emit_insn (gen_ldmxcsr (target));
13909 case IX86_BUILTIN_STMXCSR:
13910 target = assign_386_stack_local (SImode, 0);
13911 emit_insn (gen_stmxcsr (target));
13912 return copy_to_mode_reg (SImode, target);
13914 case IX86_BUILTIN_SHUFPS:
13915 case IX86_BUILTIN_SHUFPD:
13916 icode = (fcode == IX86_BUILTIN_SHUFPS
13917 ? CODE_FOR_sse_shufps
13918 : CODE_FOR_sse2_shufpd);
13919 arg0 = TREE_VALUE (arglist);
13920 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13921 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13922 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13923 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13924 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13925 tmode = insn_data[icode].operand[0].mode;
13926 mode0 = insn_data[icode].operand[1].mode;
13927 mode1 = insn_data[icode].operand[2].mode;
13928 mode2 = insn_data[icode].operand[3].mode;
13930 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13931 op0 = copy_to_mode_reg (mode0, op0);
13932 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13933 op1 = copy_to_mode_reg (mode1, op1);
13934 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13936 /* @@@ better error message */
13937 error ("mask must be an immediate");
13938 return gen_reg_rtx (tmode);
13941 || GET_MODE (target) != tmode
13942 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13943 target = gen_reg_rtx (tmode);
13944 pat = GEN_FCN (icode) (target, op0, op1, op2);
13950 case IX86_BUILTIN_PSHUFW:
13951 case IX86_BUILTIN_PSHUFD:
13952 case IX86_BUILTIN_PSHUFHW:
13953 case IX86_BUILTIN_PSHUFLW:
13954 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13955 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13956 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13957 : CODE_FOR_mmx_pshufw);
13958 arg0 = TREE_VALUE (arglist);
13959 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13960 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13961 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13962 tmode = insn_data[icode].operand[0].mode;
13963 mode1 = insn_data[icode].operand[1].mode;
13964 mode2 = insn_data[icode].operand[2].mode;
13966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13967 op0 = copy_to_mode_reg (mode1, op0);
13968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
13970 /* @@@ better error message */
13971 error ("mask must be an immediate");
13975 || GET_MODE (target) != tmode
13976 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13977 target = gen_reg_rtx (tmode);
13978 pat = GEN_FCN (icode) (target, op0, op1);
13984 case IX86_BUILTIN_PSLLDQI128:
13985 case IX86_BUILTIN_PSRLDQI128:
13986 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
13987 : CODE_FOR_sse2_lshrti3);
13988 arg0 = TREE_VALUE (arglist);
13989 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13990 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13991 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13992 tmode = insn_data[icode].operand[0].mode;
13993 mode1 = insn_data[icode].operand[1].mode;
13994 mode2 = insn_data[icode].operand[2].mode;
13996 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13998 op0 = copy_to_reg (op0);
13999 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14001 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14003 error ("shift must be an immediate");
14006 target = gen_reg_rtx (V2DImode);
14007 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14013 case IX86_BUILTIN_FEMMS:
14014 emit_insn (gen_femms ());
14017 case IX86_BUILTIN_PAVGUSB:
14018 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14020 case IX86_BUILTIN_PF2ID:
14021 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14023 case IX86_BUILTIN_PFACC:
14024 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14026 case IX86_BUILTIN_PFADD:
14027 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14029 case IX86_BUILTIN_PFCMPEQ:
14030 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14032 case IX86_BUILTIN_PFCMPGE:
14033 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14035 case IX86_BUILTIN_PFCMPGT:
14036 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14038 case IX86_BUILTIN_PFMAX:
14039 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14041 case IX86_BUILTIN_PFMIN:
14042 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14044 case IX86_BUILTIN_PFMUL:
14045 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14047 case IX86_BUILTIN_PFRCP:
14048 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14050 case IX86_BUILTIN_PFRCPIT1:
14051 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14053 case IX86_BUILTIN_PFRCPIT2:
14054 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14056 case IX86_BUILTIN_PFRSQIT1:
14057 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14059 case IX86_BUILTIN_PFRSQRT:
14060 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14062 case IX86_BUILTIN_PFSUB:
14063 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14065 case IX86_BUILTIN_PFSUBR:
14066 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14068 case IX86_BUILTIN_PI2FD:
14069 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14071 case IX86_BUILTIN_PMULHRW:
14072 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14074 case IX86_BUILTIN_PF2IW:
14075 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14077 case IX86_BUILTIN_PFNACC:
14078 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14080 case IX86_BUILTIN_PFPNACC:
14081 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14083 case IX86_BUILTIN_PI2FW:
14084 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14086 case IX86_BUILTIN_PSWAPDSI:
14087 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14089 case IX86_BUILTIN_PSWAPDSF:
14090 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14092 case IX86_BUILTIN_SSE_ZERO:
14093 target = gen_reg_rtx (V4SFmode);
14094 emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14097 case IX86_BUILTIN_MMX_ZERO:
14098 target = gen_reg_rtx (DImode);
14099 emit_insn (gen_mmx_clrdi (target));
14102 case IX86_BUILTIN_CLRTI:
14103 target = gen_reg_rtx (V2DImode);
14104 emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14108 case IX86_BUILTIN_SQRTSD:
14109 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14110 case IX86_BUILTIN_LOADAPD:
14111 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14112 case IX86_BUILTIN_LOADUPD:
14113 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14115 case IX86_BUILTIN_STOREAPD:
14116 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14117 case IX86_BUILTIN_STOREUPD:
14118 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14120 case IX86_BUILTIN_LOADSD:
14121 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14123 case IX86_BUILTIN_STORESD:
14124 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14126 case IX86_BUILTIN_SETPD1:
14127 target = assign_386_stack_local (DFmode, 0);
14128 arg0 = TREE_VALUE (arglist);
14129 emit_move_insn (adjust_address (target, DFmode, 0),
14130 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14131 op0 = gen_reg_rtx (V2DFmode);
14132 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14133 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14136 case IX86_BUILTIN_SETPD:
14137 target = assign_386_stack_local (V2DFmode, 0);
14138 arg0 = TREE_VALUE (arglist);
14139 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14140 emit_move_insn (adjust_address (target, DFmode, 0),
14141 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14142 emit_move_insn (adjust_address (target, DFmode, 8),
14143 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14144 op0 = gen_reg_rtx (V2DFmode);
14145 emit_insn (gen_sse2_movapd (op0, target));
14148 case IX86_BUILTIN_LOADRPD:
14149 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14150 gen_reg_rtx (V2DFmode), 1);
14151 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14154 case IX86_BUILTIN_LOADPD1:
14155 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14156 gen_reg_rtx (V2DFmode), 1);
14157 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14160 case IX86_BUILTIN_STOREPD1:
14161 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14162 case IX86_BUILTIN_STORERPD:
14163 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14165 case IX86_BUILTIN_CLRPD:
14166 target = gen_reg_rtx (V2DFmode);
14167 emit_insn (gen_sse_clrv2df (target));
14170 case IX86_BUILTIN_MFENCE:
14171 emit_insn (gen_sse2_mfence ());
14173 case IX86_BUILTIN_LFENCE:
14174 emit_insn (gen_sse2_lfence ());
14177 case IX86_BUILTIN_CLFLUSH:
14178 arg0 = TREE_VALUE (arglist);
14179 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14180 icode = CODE_FOR_sse2_clflush;
14181 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14182 op0 = copy_to_mode_reg (Pmode, op0);
14184 emit_insn (gen_sse2_clflush (op0));
14187 case IX86_BUILTIN_MOVNTPD:
14188 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14189 case IX86_BUILTIN_MOVNTDQ:
14190 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14191 case IX86_BUILTIN_MOVNTI:
14192 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14194 case IX86_BUILTIN_LOADDQA:
14195 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14196 case IX86_BUILTIN_LOADDQU:
14197 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14198 case IX86_BUILTIN_LOADD:
14199 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14201 case IX86_BUILTIN_STOREDQA:
14202 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14203 case IX86_BUILTIN_STOREDQU:
14204 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14205 case IX86_BUILTIN_STORED:
14206 return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14212 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14213 if (d->code == fcode)
14215 /* Compares are treated specially. */
14216 if (d->icode == CODE_FOR_maskcmpv4sf3
14217 || d->icode == CODE_FOR_vmmaskcmpv4sf3
14218 || d->icode == CODE_FOR_maskncmpv4sf3
14219 || d->icode == CODE_FOR_vmmaskncmpv4sf3
14220 || d->icode == CODE_FOR_maskcmpv2df3
14221 || d->icode == CODE_FOR_vmmaskcmpv2df3
14222 || d->icode == CODE_FOR_maskncmpv2df3
14223 || d->icode == CODE_FOR_vmmaskncmpv2df3)
14224 return ix86_expand_sse_compare (d, arglist, target);
14226 return ix86_expand_binop_builtin (d->icode, arglist, target);
14229 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14230 if (d->code == fcode)
14231 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14233 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14234 if (d->code == fcode)
14235 return ix86_expand_sse_comi (d, arglist, target);
14237 /* @@@ Should really do something sensible here. */
14241 /* Store OPERAND to the memory after reload is completed. This means
14242 that we can't easily use assign_stack_local. */
14244 ix86_force_to_memory (mode, operand)
14245 enum machine_mode mode;
14249 if (!reload_completed)
14251 if (TARGET_64BIT && TARGET_RED_ZONE)
14253 result = gen_rtx_MEM (mode,
14254 gen_rtx_PLUS (Pmode,
14256 GEN_INT (-RED_ZONE_SIZE)));
14257 emit_move_insn (result, operand);
14259 else if (TARGET_64BIT && !TARGET_RED_ZONE)
14265 operand = gen_lowpart (DImode, operand);
14269 gen_rtx_SET (VOIDmode,
14270 gen_rtx_MEM (DImode,
14271 gen_rtx_PRE_DEC (DImode,
14272 stack_pointer_rtx)),
14278 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14287 split_di (&operand, 1, operands, operands + 1);
14289 gen_rtx_SET (VOIDmode,
14290 gen_rtx_MEM (SImode,
14291 gen_rtx_PRE_DEC (Pmode,
14292 stack_pointer_rtx)),
14295 gen_rtx_SET (VOIDmode,
14296 gen_rtx_MEM (SImode,
14297 gen_rtx_PRE_DEC (Pmode,
14298 stack_pointer_rtx)),
14303 /* It is better to store HImodes as SImodes. */
14304 if (!TARGET_PARTIAL_REG_STALL)
14305 operand = gen_lowpart (SImode, operand);
14309 gen_rtx_SET (VOIDmode,
14310 gen_rtx_MEM (GET_MODE (operand),
14311 gen_rtx_PRE_DEC (SImode,
14312 stack_pointer_rtx)),
14318 result = gen_rtx_MEM (mode, stack_pointer_rtx);
14323 /* Free operand from the memory. */
14325 ix86_free_from_memory (mode)
14326 enum machine_mode mode;
14328 if (!TARGET_64BIT || !TARGET_RED_ZONE)
14332 if (mode == DImode || TARGET_64BIT)
14334 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14338 /* Use LEA to deallocate stack space. In peephole2 it will be converted
14339 to pop or add instruction if registers are available. */
14340 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14341 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14346 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14347 QImode must go into class Q_REGS.
14348 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
14349 movdf to do mem-to-mem moves through integer regs. */
14351 ix86_preferred_reload_class (x, class)
14353 enum reg_class class;
14355 if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14357 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14359 /* SSE can't load any constant directly yet. */
14360 if (SSE_CLASS_P (class))
14362 /* Floats can load 0 and 1. */
14363 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14365 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
14366 if (MAYBE_SSE_CLASS_P (class))
14367 return (reg_class_subset_p (class, GENERAL_REGS)
14368 ? GENERAL_REGS : FLOAT_REGS);
14372 /* General regs can load everything. */
14373 if (reg_class_subset_p (class, GENERAL_REGS))
14374 return GENERAL_REGS;
14375 /* In case we haven't resolved FLOAT or SSE yet, give up. */
14376 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14379 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14381 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14386 /* If we are copying between general and FP registers, we need a memory
14387 location. The same is true for SSE and MMX registers.
14389 The macro can't work reliably when one of the CLASSES is class containing
14390 registers from multiple units (SSE, MMX, integer). We avoid this by never
14391 combining those units in single alternative in the machine description.
14392 Ensure that this constraint holds to avoid unexpected surprises.
14394 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14395 enforce these sanity checks. */
14397 ix86_secondary_memory_needed (class1, class2, mode, strict)
14398 enum reg_class class1, class2;
14399 enum machine_mode mode;
14402 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14403 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14404 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14405 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14406 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14407 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14414 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14415 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14416 && (mode) != SImode)
14417 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14418 && (mode) != SImode));
14420 /* Return the cost of moving data from a register in class CLASS1 to
14421 one in class CLASS2.
14423 It is not required that the cost always equal 2 when FROM is the same as TO;
14424 on some machines it is expensive to move between registers if they are not
14425 general registers. */
14427 ix86_register_move_cost (mode, class1, class2)
14428 enum machine_mode mode;
14429 enum reg_class class1, class2;
14431 /* In case we require secondary memory, compute cost of the store followed
14432 by load. In order to avoid bad register allocation choices, we need
14433 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
14435 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14439 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14440 MEMORY_MOVE_COST (mode, class1, 1));
14441 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14442 MEMORY_MOVE_COST (mode, class2, 1));
14444 /* In case of copying from general_purpose_register we may emit multiple
14445 stores followed by single load causing memory size mismatch stall.
14446 Count this as arbitrarily high cost of 20. */
14447 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14450 /* In the case of FP/MMX moves, the registers actually overlap, and we
14451 have to switch modes in order to treat them differently. */
14452 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14453 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14459 /* Moves between SSE/MMX and integer unit are expensive. */
14460 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14461 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14462 return ix86_cost->mmxsse_to_integer;
14463 if (MAYBE_FLOAT_CLASS_P (class1))
14464 return ix86_cost->fp_move;
14465 if (MAYBE_SSE_CLASS_P (class1))
14466 return ix86_cost->sse_move;
14467 if (MAYBE_MMX_CLASS_P (class1))
14468 return ix86_cost->mmx_move;
14472 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
14474 ix86_hard_regno_mode_ok (regno, mode)
14476 enum machine_mode mode;
14478 /* Flags and only flags can only hold CCmode values. */
14479 if (CC_REGNO_P (regno))
14480 return GET_MODE_CLASS (mode) == MODE_CC;
14481 if (GET_MODE_CLASS (mode) == MODE_CC
14482 || GET_MODE_CLASS (mode) == MODE_RANDOM
14483 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14485 if (FP_REGNO_P (regno))
14486 return VALID_FP_MODE_P (mode);
14487 if (SSE_REGNO_P (regno))
14488 return VALID_SSE_REG_MODE (mode);
14489 if (MMX_REGNO_P (regno))
14490 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
14491 /* We handle both integer and floats in the general purpose registers.
14492 In future we should be able to handle vector modes as well. */
14493 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14495 /* Take care for QImode values - they can be in non-QI regs, but then
14496 they do cause partial register stalls. */
14497 if (regno < 4 || mode != QImode || TARGET_64BIT)
14499 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14502 /* Return the cost of moving data of mode M between a
14503 register and memory. A value of 2 is the default; this cost is
14504 relative to those in `REGISTER_MOVE_COST'.
14506 If moving between registers and memory is more expensive than
14507 between two registers, you should define this macro to express the
14510 Model also increased moving costs of QImode registers in non
14514 ix86_memory_move_cost (mode, class, in)
14515 enum machine_mode mode;
14516 enum reg_class class;
14519 if (FLOAT_CLASS_P (class))
14537 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
14539 if (SSE_CLASS_P (class))
14542 switch (GET_MODE_SIZE (mode))
14556 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
14558 if (MMX_CLASS_P (class))
14561 switch (GET_MODE_SIZE (mode))
14572 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
14574 switch (GET_MODE_SIZE (mode))
14578 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
14579 : ix86_cost->movzbl_load);
14581 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
14582 : ix86_cost->int_store[0] + 4);
14585 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
14587 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
14588 if (mode == TFmode)
14590 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
14591 * ((int) GET_MODE_SIZE (mode)
14592 + UNITS_PER_WORD -1 ) / UNITS_PER_WORD);
14596 /* Compute a (partial) cost for rtx X. Return true if the complete
14597 cost has been computed, and false if subexpressions should be
14598 scanned. In either case, *TOTAL contains the cost result. */
14601 ix86_rtx_costs (x, code, outer_code, total)
14603 int code, outer_code;
14606 enum machine_mode mode = GET_MODE (x);
14614 if (TARGET_64BIT && !x86_64_sign_extended_value (x))
14616 else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
14618 else if (flag_pic && SYMBOLIC_CONST (x))
14625 if (mode == VOIDmode)
14628 switch (standard_80387_constant_p (x))
14637 /* Start with (MEM (SYMBOL_REF)), since that's where
14638 it'll probably end up. Add a penalty for size. */
14639 *total = (COSTS_N_INSNS (1)
14641 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
14647 /* The zero extensions is often completely free on x86_64, so make
14648 it as cheap as possible. */
14649 if (TARGET_64BIT && mode == DImode
14650 && GET_MODE (XEXP (x, 0)) == SImode)
14652 else if (TARGET_ZERO_EXTEND_WITH_AND)
14653 *total = COSTS_N_INSNS (ix86_cost->add);
14655 *total = COSTS_N_INSNS (ix86_cost->movzx);
14659 *total = COSTS_N_INSNS (ix86_cost->movsx);
14663 if (GET_CODE (XEXP (x, 1)) == CONST_INT
14664 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
14666 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14669 *total = COSTS_N_INSNS (ix86_cost->add);
14672 if ((value == 2 || value == 3)
14673 && !TARGET_DECOMPOSE_LEA
14674 && ix86_cost->lea <= ix86_cost->shift_const)
14676 *total = COSTS_N_INSNS (ix86_cost->lea);
14686 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
14688 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14690 if (INTVAL (XEXP (x, 1)) > 32)
14691 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
14693 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
14697 if (GET_CODE (XEXP (x, 1)) == AND)
14698 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
14700 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
14705 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14706 *total = COSTS_N_INSNS (ix86_cost->shift_const);
14708 *total = COSTS_N_INSNS (ix86_cost->shift_var);
14713 if (FLOAT_MODE_P (mode))
14714 *total = COSTS_N_INSNS (ix86_cost->fmul);
14715 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
14717 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
14720 for (nbits = 0; value != 0; value >>= 1)
14723 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14724 + nbits * ix86_cost->mult_bit);
14728 /* This is arbitrary */
14729 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
14730 + 7 * ix86_cost->mult_bit);
14738 if (FLOAT_MODE_P (mode))
14739 *total = COSTS_N_INSNS (ix86_cost->fdiv);
14741 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
14745 if (FLOAT_MODE_P (mode))
14746 *total = COSTS_N_INSNS (ix86_cost->fadd);
14747 else if (!TARGET_DECOMPOSE_LEA
14748 && GET_MODE_CLASS (mode) == MODE_INT
14749 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
14751 if (GET_CODE (XEXP (x, 0)) == PLUS
14752 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14753 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
14754 && CONSTANT_P (XEXP (x, 1)))
14756 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
14757 if (val == 2 || val == 4 || val == 8)
14759 *total = COSTS_N_INSNS (ix86_cost->lea);
14760 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14761 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
14763 *total += rtx_cost (XEXP (x, 1), outer_code);
14767 else if (GET_CODE (XEXP (x, 0)) == MULT
14768 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
14770 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
14771 if (val == 2 || val == 4 || val == 8)
14773 *total = COSTS_N_INSNS (ix86_cost->lea);
14774 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14775 *total += rtx_cost (XEXP (x, 1), outer_code);
14779 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14781 *total = COSTS_N_INSNS (ix86_cost->lea);
14782 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
14783 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
14784 *total += rtx_cost (XEXP (x, 1), outer_code);
14791 if (FLOAT_MODE_P (mode))
14793 *total = COSTS_N_INSNS (ix86_cost->fadd);
14801 if (!TARGET_64BIT && mode == DImode)
14803 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
14804 + (rtx_cost (XEXP (x, 0), outer_code)
14805 << (GET_MODE (XEXP (x, 0)) != DImode))
14806 + (rtx_cost (XEXP (x, 1), outer_code)
14807 << (GET_MODE (XEXP (x, 1)) != DImode)));
14813 if (FLOAT_MODE_P (mode))
14815 *total = COSTS_N_INSNS (ix86_cost->fchs);
14821 if (!TARGET_64BIT && mode == DImode)
14822 *total = COSTS_N_INSNS (ix86_cost->add * 2);
14824 *total = COSTS_N_INSNS (ix86_cost->add);
14828 if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
14833 if (FLOAT_MODE_P (mode))
14834 *total = COSTS_N_INSNS (ix86_cost->fabs);
14838 if (FLOAT_MODE_P (mode))
14839 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
14847 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
14849 ix86_svr3_asm_out_constructor (symbol, priority)
14851 int priority ATTRIBUTE_UNUSED;
14854 fputs ("\tpushl $", asm_out_file);
14855 assemble_name (asm_out_file, XSTR (symbol, 0));
14856 fputc ('\n', asm_out_file);
14862 static int current_machopic_label_num;
14864 /* Given a symbol name and its associated stub, write out the
14865 definition of the stub. */
14868 machopic_output_stub (file, symb, stub)
14870 const char *symb, *stub;
14872 unsigned int length;
14873 char *binder_name, *symbol_name, lazy_ptr_name[32];
14874 int label = ++current_machopic_label_num;
14876 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
14877 symb = (*targetm.strip_name_encoding) (symb);
14879 length = strlen (stub);
14880 binder_name = alloca (length + 32);
14881 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
14883 length = strlen (symb);
14884 symbol_name = alloca (length + 32);
14885 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
14887 sprintf (lazy_ptr_name, "L%d$lz", label);
14890 machopic_picsymbol_stub_section ();
14892 machopic_symbol_stub_section ();
14894 fprintf (file, "%s:\n", stub);
14895 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14899 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
14900 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
14901 fprintf (file, "\tjmp %%edx\n");
14904 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
14906 fprintf (file, "%s:\n", binder_name);
14910 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
14911 fprintf (file, "\tpushl %%eax\n");
14914 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
14916 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
14918 machopic_lazy_symbol_ptr_section ();
14919 fprintf (file, "%s:\n", lazy_ptr_name);
14920 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
14921 fprintf (file, "\t.long %s\n", binder_name);
14923 #endif /* TARGET_MACHO */
14925 /* Order the registers for register allocator. */
14928 x86_order_regs_for_local_alloc ()
14933 /* First allocate the local general purpose registers. */
14934 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14935 if (GENERAL_REGNO_P (i) && call_used_regs[i])
14936 reg_alloc_order [pos++] = i;
14938 /* Global general purpose registers. */
14939 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
14940 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
14941 reg_alloc_order [pos++] = i;
14943 /* x87 registers come first in case we are doing FP math
14945 if (!TARGET_SSE_MATH)
14946 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14947 reg_alloc_order [pos++] = i;
14949 /* SSE registers. */
14950 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
14951 reg_alloc_order [pos++] = i;
14952 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
14953 reg_alloc_order [pos++] = i;
14955 /* x87 registers. */
14956 if (TARGET_SSE_MATH)
14957 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
14958 reg_alloc_order [pos++] = i;
14960 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
14961 reg_alloc_order [pos++] = i;
14963 /* Initialize the rest of array as we do not allocate some registers
14965 while (pos < FIRST_PSEUDO_REGISTER)
14966 reg_alloc_order [pos++] = 0;
14969 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
14970 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
14973 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
14974 struct attribute_spec.handler. */
14976 ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
14979 tree args ATTRIBUTE_UNUSED;
14980 int flags ATTRIBUTE_UNUSED;
14981 bool *no_add_attrs;
14984 if (DECL_P (*node))
14986 if (TREE_CODE (*node) == TYPE_DECL)
14987 type = &TREE_TYPE (*node);
14992 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
14993 || TREE_CODE (*type) == UNION_TYPE)))
14995 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
14996 *no_add_attrs = true;
14999 else if ((is_attribute_p ("ms_struct", name)
15000 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15001 || ((is_attribute_p ("gcc_struct", name)
15002 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15004 warning ("`%s' incompatible attribute ignored",
15005 IDENTIFIER_POINTER (name));
15006 *no_add_attrs = true;
15013 ix86_ms_bitfield_layout_p (record_type)
15016 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15017 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15018 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15021 /* Returns an expression indicating where the this parameter is
15022 located on entry to the FUNCTION. */
15025 x86_this_parameter (function)
15028 tree type = TREE_TYPE (function);
15032 int n = aggregate_value_p (TREE_TYPE (type)) != 0;
15033 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15036 if (ix86_fntype_regparm (type) > 0)
15040 parm = TYPE_ARG_TYPES (type);
15041 /* Figure out whether or not the function has a variable number of
15043 for (; parm; parm = TREE_CHAIN (parm))
15044 if (TREE_VALUE (parm) == void_type_node)
15046 /* If not, the this parameter is in %eax. */
15048 return gen_rtx_REG (SImode, 0);
15051 if (aggregate_value_p (TREE_TYPE (type)))
15052 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15054 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15057 /* Determine whether x86_output_mi_thunk can succeed. */
15060 x86_can_output_mi_thunk (thunk, delta, vcall_offset, function)
15061 tree thunk ATTRIBUTE_UNUSED;
15062 HOST_WIDE_INT delta ATTRIBUTE_UNUSED;
15063 HOST_WIDE_INT vcall_offset;
15066 /* 64-bit can handle anything. */
15070 /* For 32-bit, everything's fine if we have one free register. */
15071 if (ix86_fntype_regparm (TREE_TYPE (function)) < 3)
15074 /* Need a free register for vcall_offset. */
15078 /* Need a free register for GOT references. */
15079 if (flag_pic && !(*targetm.binds_local_p) (function))
15082 /* Otherwise ok. */
15086 /* Output the assembler code for a thunk function. THUNK_DECL is the
15087 declaration for the thunk function itself, FUNCTION is the decl for
15088 the target function. DELTA is an immediate constant offset to be
15089 added to THIS. If VCALL_OFFSET is nonzero, the word at
15090 *(*this + vcall_offset) should be added to THIS. */
15093 x86_output_mi_thunk (file, thunk, delta, vcall_offset, function)
15094 FILE *file ATTRIBUTE_UNUSED;
15095 tree thunk ATTRIBUTE_UNUSED;
15096 HOST_WIDE_INT delta;
15097 HOST_WIDE_INT vcall_offset;
15101 rtx this = x86_this_parameter (function);
15104 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
15105 pull it in now and let DELTA benefit. */
15108 else if (vcall_offset)
15110 /* Put the this parameter into %eax. */
15112 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15113 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15116 this_reg = NULL_RTX;
15118 /* Adjust the this parameter by a fixed constant. */
15121 xops[0] = GEN_INT (delta);
15122 xops[1] = this_reg ? this_reg : this;
15125 if (!x86_64_general_operand (xops[0], DImode))
15127 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15129 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15133 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15136 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15139 /* Adjust the this parameter by a value stored in the vtable. */
15143 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15145 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15147 xops[0] = gen_rtx_MEM (Pmode, this_reg);
15150 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15152 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15154 /* Adjust the this parameter. */
15155 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15156 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15158 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15159 xops[0] = GEN_INT (vcall_offset);
15161 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15162 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15164 xops[1] = this_reg;
15166 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15168 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15171 /* If necessary, drop THIS back to its stack slot. */
15172 if (this_reg && this_reg != this)
15174 xops[0] = this_reg;
15176 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15179 xops[0] = DECL_RTL (function);
15182 if (!flag_pic || (*targetm.binds_local_p) (function))
15183 output_asm_insn ("jmp\t%P0", xops);
15186 tmp = XEXP (xops[0], 0);
15187 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, tmp), UNSPEC_GOTPCREL);
15188 tmp = gen_rtx_CONST (Pmode, tmp);
15189 tmp = gen_rtx_MEM (QImode, tmp);
15191 output_asm_insn ("jmp\t%A0", xops);
15196 if (!flag_pic || (*targetm.binds_local_p) (function))
15197 output_asm_insn ("jmp\t%P0", xops);
15202 char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15203 tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15204 tmp = gen_rtx_MEM (QImode, tmp);
15206 output_asm_insn ("jmp\t%0", xops);
15209 #endif /* TARGET_MACHO */
15211 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15212 output_set_got (tmp);
15215 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15216 output_asm_insn ("jmp\t{*}%1", xops);
15222 x86_field_alignment (field, computed)
15226 enum machine_mode mode;
15227 tree type = TREE_TYPE (field);
15229 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15231 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15232 ? get_inner_array_type (type) : type);
15233 if (mode == DFmode || mode == DCmode
15234 || GET_MODE_CLASS (mode) == MODE_INT
15235 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15236 return MIN (32, computed);
15240 /* Output assembler code to FILE to increment profiler label # LABELNO
15241 for profiling a function entry. */
15243 x86_function_profiler (file, labelno)
15245 int labelno ATTRIBUTE_UNUSED;
15250 #ifndef NO_PROFILE_COUNTERS
15251 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15253 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15257 #ifndef NO_PROFILE_COUNTERS
15258 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15260 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15264 #ifndef NO_PROFILE_COUNTERS
15265 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15266 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15268 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15272 #ifndef NO_PROFILE_COUNTERS
15273 fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
15274 PROFILE_COUNT_REGISTER);
15276 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15280 /* Implement machine specific optimizations.
15281 At the moment we implement single transformation: AMD Athlon works faster
15282 when RET is not destination of conditional jump or directly preceded
15283 by other jump instruction. We avoid the penalty by inserting NOP just
15284 before the RET instructions in such cases. */
15286 x86_machine_dependent_reorg (first)
15287 rtx first ATTRIBUTE_UNUSED;
15291 if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15293 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15295 basic_block bb = e->src;
15298 bool insert = false;
15300 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
15302 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15303 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15305 if (prev && GET_CODE (prev) == CODE_LABEL)
15308 for (e = bb->pred; e; e = e->pred_next)
15309 if (EDGE_FREQUENCY (e) && e->src->index >= 0
15310 && !(e->flags & EDGE_FALLTHRU))
15315 prev = prev_active_insn (ret);
15316 if (prev && GET_CODE (prev) == JUMP_INSN
15317 && any_condjump_p (prev))
15319 /* Empty functions get branch misspredict even when the jump destination
15320 is not visible to us. */
15321 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15325 emit_insn_before (gen_nop (), ret);
15329 /* Return nonzero when QImode register that must be represented via REX prefix
15332 x86_extended_QIreg_mentioned_p (insn)
15336 extract_insn_cached (insn);
15337 for (i = 0; i < recog_data.n_operands; i++)
15338 if (REG_P (recog_data.operand[i])
15339 && REGNO (recog_data.operand[i]) >= 4)
15344 /* Return nonzero when P points to register encoded via REX prefix.
15345 Called via for_each_rtx. */
15347 extended_reg_mentioned_1 (p, data)
15349 void *data ATTRIBUTE_UNUSED;
15351 unsigned int regno;
15354 regno = REGNO (*p);
15355 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15358 /* Return true when INSN mentions register that must be encoded using REX
15361 x86_extended_reg_mentioned_p (insn)
15364 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15367 #include "gt-i386.h"